⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 epc2d.c

📁 一个投影聚类算法及其数据集生成源码。 参考文献: Eric K.K. Ng, A. Fu : Efficient algorithm for Projected Clustering,
💻 C
📖 第 1 页 / 共 4 页
字号:
/*      EPC2                implementationimage.data image.out image.res 7 20
ion.data ion.out ion.res 7 20        Program         :       EPC2        Programmer      :       Ng Ka Ka        Purpose         :       To use EPC2 approach for Projective Clustering        Usage           :       cluster <datafile> <outputfile> <max_no_cluster)        Status          :       version 2.3        Bug Fixed       :       1. when the no. of actual nodes is smaller than the expect no. of nodes after merge_sort, a bug                                        invovles invalid memory copying                                   Fixed to handle this general case                                2. don't remember which bug i have fixed        Changes         :       1. set the density threshold to 1SD instead of 2SD ?? no change....                                2. command line parameter max_no_cluster                                3. Change influence to                                        1 2 3 4 3 2 1                                4. Change signal-to-noise ration from 10 to 5                                5. Set THRESHOLD to 2				6. Add 2D Implementation					add data structures to handle 2D implementation					add functions to handle 2D implementation					change influence to 								1					    1   2   1					1   2   3   2   1					    1   2   1						1					set threshold to 7 (around 5-8 is better)				10. 	Use real histogram (do not add influence to neighbor)					set NO_INTERVAL to 40					threshold should be around 5				11.	distinguish and detect signature_bit with conflicting id_plane				12.	Add detecting 45, 135, 225, 315 degree in detectdense_2d				13.	set outlier to be <0.1 likeness				14.	modify combine_signature_bit_list						compare each two signature entries and calculate their similarity						similarity > 0.6 would be merged						continue the merging until no combining in an iteration						(Remark: reduce the number of simlarity computation by examing pairs of signature entries with diff dimension < nodimension/5 to speed up)				15.	measure the user CPU time				16.	modify combine_signature_bit_list						in each iteration							copmare every pairs of signature entries, records the pair with the highest similarity							merge the pair							until the highest similarity is lower than let say 0.3        Final Edit Date :       26/8/02*/#include <stdio.h>#include <stdlib.h>#include <math.h>
#include <string.h>
#ifndef WIN32#include <sys/resource.h>#endif
/* Defined internal Parameters *//* This is the suggested values, changing the values can have (good or bad) effect on clustering quality and time */#define NO_INTERVAL 20			/* should be enough for general purpose */#define NO_SLOT 1			/* -------------------------------------------------------------*//* Data Structure 						*/typedef struct	{	float *component;	int *signature;	int *signature_2d;	int clust_assg;}	Point;typedef struct {	long nopoint;	int nodimension;	Point *point;}	Dataset;typedef struct {	int *dense_id;	int *density;	int no_dense;	int maxid;}	Histogram_2d;typedef struct tInterval_2D *Interval_2Dptr;typedef struct tInterval_2D{	int no_point;	int mid1;	int left1;	int right1;	int mid2;	int left2;	int right2;	Interval_2Dptr next;	Interval_2Dptr previous;}	Interval_2D ;typedef struct {	int no_dimension;	int *dimension;	float *locationleft;	float *locationright;}	Cluster;typedef struct {	int no_plane;	int *dimension1;	int *dimension2;	int *dense_id;}	Cluster_2d;typedef struct tID_Plane *ID_Planeptr;typedef struct tID_Plane{	int dimension1;	int dimension2;	int no_id;	int *id;	ID_Planeptr next;}	ID_Plane;typedef struct tSignature_Bit *Signature_Bitptr;typedef struct tSignature_Bit{	float score;	int no_id_plane;	unsigned int *bitmap;	ID_Planeptr next_plane;	Signature_Bitptr next_signature;}	Signature_Bit;typedef struct {	int no_signature;	Signature_Bitptr next;}	Signature_Bit_List;/* End Data Structure 						*//* -------------------------------------------------------------*//* ------------------- Function Prototypes ---------------------*//* Math Functions */long factorial(int m, int n);long combination (int m, int n);/* Signature Bit Data Structure Handling Functions */Signature_Bit *derive_signature_bit (const int *signature, int nodimension, int no_entry);void insert_signature_bit (Signature_Bit_List *signature_bit_list, Signature_Bit *signature_bit, int nodimension);void insert_signature_bit_score (Signature_Bit_List *signature_bit_list, Signature_Bit *signature_bit);int eq_signature_bit (Signature_Bit *sig1, Signature_Bit *sig2);int insert_id_plane (int dimension1, int dimension2, int id, ID_Planeptr id_planeptr);int eq_id_plane (ID_Planeptr target_planeptr, ID_Planeptr src_planeptr);int compare_signature_bit_score (Signature_Bitptr sig1, Signature_Bitptr sig2);int compare_signature_bit (Signature_Bitptr sig1, Signature_Bitptr sig2, int nodimension);Signature_Bitptr similar_signatures (Signature_Bit *sig1, Signature_Bit *sig2, float *similarity);int common_id_plane (Signature_Bit *sig1, Signature_Bit *sig2, int *uncommon1, int *uncommon2, ID_Planeptr the);int compare_id_plane (ID_Planeptr plane1, ID_Planeptr plane2);int diff_signature_bitmap (int *sig1, int *sig2, int nodimension);int subset_signature_bitmap (int *sig1, int *sig2, int nodimension);void combine_signature_bit_list (FILE *fp, Signature_Bit_List *signature_bit_list, int nodimension);void sort_signature_bit_score (Signature_Bit_List *original, Signature_Bit_List *result);void refine_signature_bit_list (Signature_Bit_List *signature_bit_list, int no_left);void remove_signature_bit (Signature_Bit_List *signature_bit_list, int position) ;void free_id_plane (ID_Planeptr the);void print_signature_bit (FILE *fp, Signature_Bitptr signature_bit, int nodimension);void print_signature_bit_list (FILE *fp, Signature_Bit_List *signature_bit_list, int nodimension);/* Signature_2D functions */void derive_Signature_2d (Dataset *dataset, Histogram_2d *histogram, float left, float right);void print_Signature_2d (Dataset *dataset, int no_entry, Histogram_2d *histogram);/* Point Data structure Handling Functions */void readin_Points(FILE *, Dataset *);void print_Points(Dataset *);/* Histogram and Dense Regions Functions */void gen_histogram_2d(FILE *, Dataset *, float, float, Histogram_2d *);float find_threshold_histogram_2d (FILE *output, Histogram_2d *histogram, int histogram_id, int iteration);void countdense_2d (FILE *output, Histogram_2d *histogram, int histogram_id);void searchdense_2d (FILE *output, Histogram_2d *histogram, int i, int j, int histogram_id, int original, float threshold);void detectdense_2d (FILE *output, Histogram_2d *histogram, int histogram_id, float threshold, int *current_id);void print_histogram_2d (FILE *, Dataset *, Histogram_2d *, int);/* Assigning and Clustering Functions */void assign_Clusters_2d (Signature_Bit_List *signature_bit_list, Cluster_2d *theclusters, int nodimension);void assign_Points_2d (FILE *fp, Dataset *dataset, Cluster_2d *clusters_2d, int no_cluster, Histogram_2d *histogram, int left_margin, int right_margin);void print_clustasg(FILE *output, Dataset *dataset, float left_margin, float right_margin, int no_cluster);void print_clustconfig_2d(FILE *out, Cluster_2d *cluster, int nocluster);/* general utility */int dense_id_2_base (int, int, Histogram_2d *);void dense_id_2_dim (int, int, int *, int *);/* --------------------------------------------------------------*//* Global Variable */int THRESHOLD; /* to be input as command line argument *//*---------------------------------------------------------------------        Function        :       factorial        Purpose         :       to calculate the factorial m / factorial n  ----------------------------------------------------------------------*/long factorial(int m, int n){	long result=1;	int i;	for (i=m; i>n; i--)			result *= i;	return result;}/*---------------------------------------------------------------------        Function        :       combination        Purpose         :       to calculate mCn by calling factorial  ----------------------------------------------------------------------*/long combination (int m, int n){	long result;	result = factorial (m, m-n)/factorial (n, 1);	return result;}	/*------------------------------------------------------------------------	Function	:	print_signature_bit	Purpose		:	to print the given signature bit  ----------------------------------------------------------------------*/void print_signature_bit (FILE *fp, Signature_Bitptr signature_bit, int nodimension){	int i, j;	int position, WordNo;	ID_Planeptr current_id_plane;	fprintf (fp, "%.3f\t", signature_bit->score);	for (i=0; i<nodimension; i++)	{		position = i %32;		WordNo = i /32;		if (((signature_bit->bitmap[WordNo]>>position) & 1) == 1)	{			fprintf (fp, "%d ", i);		}	}	fprintf (fp, "\n");	current_id_plane = signature_bit->next_plane;	for (i=0; i<signature_bit->no_id_plane; i++)	{		current_id_plane = current_id_plane->next;		fprintf (fp, "[%d, %d]\t", current_id_plane->dimension1, current_id_plane->dimension2);		for (j=0; j<current_id_plane->no_id; j++)	{			fprintf (fp, "%d ", current_id_plane->id[j]);		}		fprintf (fp, "\n");	}}/*------------------------------------------------------------------------	Function	:	print_signature_bit_list	Purpose		:	to print the given signature bit list  ----------------------------------------------------------------------*/void print_signature_bit_list (FILE *fp, Signature_Bit_List *signature_bit_list, int nodimension){	int i, j;	Signature_Bitptr current_signature;		fprintf (fp, "There are %d signature bits \n", signature_bit_list->no_signature);	current_signature = signature_bit_list->next;	for (i=0; i<signature_bit_list->no_signature; i++)	{		current_signature = current_signature->next_signature;		print_signature_bit (fp, current_signature, nodimension);	}}	/*------------------------------------------------------------------------	Function	:	compare_signature_bit_score	Purpose		:	to compare two given signature bit	Return		:	1 if score of sig1 is smaller than sig2, -1 if score of sig 2 is smaller than sign1, 0 if equal  ----------------------------------------------------------------------*/int compare_signature_bit_score (Signature_Bitptr sig1, Signature_Bitptr sig2){	if (sig1->score < sig2->score)		return 1;	else if (sig1->score > sig2->score)		return -1;	else	return 0;}			/*------------------------------------------------------------------------	Function	:	compare_signature_bit	Purpose		:	to compare the "bitmap" of two given signature bit	Return		:	-1 if score of sig1 is smaller than sig2, 1 if score of sig 2 is smaller than sign1, 0 if equal  ----------------------------------------------------------------------*/int compare_signature_bit (Signature_Bitptr sig1, Signature_Bitptr sig2, int nodimension){	int no_int, i;	no_int = nodimension / 32;	if (no_int*32<nodimension)		no_int++;	for (i=0; i<no_int; i++)	{		if (sig1->bitmap[i]>sig2->bitmap[i])			return 1;		else if (sig1->bitmap[i]<sig2->bitmap[i])			return -1;	}	return 0;}	/*------------------------------------------------------------------------	Function	:	insert_signautre_bit	Purpose		:	to insert a signature_bit into a signature_bit_list according to the bit value  ----------------------------------------------------------------------*/	void insert_signature_bit (Signature_Bit_List *signature_bit_list, Signature_Bit *signature_bit, int nodimension){	Signature_Bit *current_signature;	ID_Planeptr current_plane;	int i, j;	int compare_result;	int inserted=0;	current_signature = signature_bit_list->next;	/* transverse the signature_bit_list */		for (i=0; i<signature_bit_list->no_signature; i++)	{		/* find the position where the signature_bit should be inserted into the linked list (if next signature in the list is smaller than the signature to be inserted)*/		compare_result = compare_signature_bit (current_signature->next_signature, signature_bit, nodimension);		if (compare_result==1)	{			signature_bit->next_signature = current_signature->next_signature;			current_signature->next_signature = signature_bit;			signature_bit_list->no_signature++;			inserted=1;			break;		}		/* the signature_bit already exist in the linked list */		else if (compare_result == 0)	{			/* if the already exist signature_bit contains no conflict id_plane with signature_bit */			if (eq_signature_bit (current_signature->next_signature, signature_bit) == 1)	{				/* insert the id_plane into the existing signature bit */				current_plane = signature_bit->next_plane;							for (j=0; j<signature_bit->no_id_plane; j++)	{					if ((insert_id_plane (current_plane->next->dimension1, current_plane->next->dimension2, current_plane->next->id[0], current_signature->next_signature->next_plane)) == 1)						current_signature->next_signature->no_id_plane++;					current_plane = current_plane->next;				}				/* add the score to the existing signature bit */				current_signature->next_signature->score += signature_bit->score;				inserted=1;				break;			}						/* if the already exist signature_bit contains conflict id_plane with signature_bit, continue to test next signature */			else {				;			}		}		/* continue to search for the location */		current_signature = current_signature->next_signature;	}	/* case if transverse to the end of the signature_bit_list, insert signature_bit into the end */	if (inserted==0)	{		current_signature->next_signature = signature_bit;		signature_bit->next_signature = NULL;		signature_bit_list->no_signature++;	}		}int eq_signature_bit (Signature_Bit *sig1, Signature_Bit *sig2){	ID_Planeptr current_plane;	int answer=1;	current_plane = sig2->next_plane;	while (current_plane->next != NULL)	{		if (eq_id_plane (sig1->next_plane, current_plane->next)==1)	{			current_plane = current_plane->next;		}		else	{			answer = 0;			break;		}	}	return answer;}	/*------------------------------------------------------------------------	Function	:	sort_signature_bit_score	Purpose		:	to sort signature_bit in signature_bit_list in descending order of score   ----------------------------------------------------------------------*/	void sort_signature_bit_score (Signature_Bit_List *original, Signature_Bit_List *result){		Signature_Bit *current_signature=original->next;	int i;	for (i=0; i<original->no_signature; i++)	{		current_signature = current_signature->next_signature;		insert_signature_bit_score(result, current_signature);	}}	/*------------------------------------------------------------------------	Function	:	insert_signautre_bit_score	Purpose		:	to insert a signature_bit into a signature_bit_list according to the score value	Called_by	:	sort_signature_bit_score  ----------------------------------------------------------------------*/		void insert_signature_bit_score (Signature_Bit_List *signature_bit_list, Signature_Bit *signature_bit){	Signature_Bit *current_signature;	Signature_Bit *new_signature;	int compare_result;	int inserted=0;	int i;	new_signature = (Signature_Bit *)malloc (sizeof (Signature_Bit));	memcpy (new_signature, signature_bit, sizeof (Signature_Bit));	current_signature = signature_bit_list->next;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -