⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 epc2d.c

📁 一个投影聚类算法及其数据集生成源码。 参考文献: Eric K.K. Ng, A. Fu : Efficient algorithm for Projected Clustering,
💻 C
📖 第 1 页 / 共 4 页
字号:
				else 	return 0;			}			/* return TRUE if there are no matching id plane */			else return 1;}	int insert_id_plane (int dimension1, int dimension2, int id, ID_Planeptr id_planeptr){	ID_Planeptr temp_id_planeptr;	int return_value; 	int i;	int found=0;				/* transverse the id_plane linked list until a suitable place to insert the new id_plane */			while ((id_planeptr->next != NULL) && (id_planeptr->next->dimension1<dimension1))				id_planeptr = id_planeptr->next;			while ((id_planeptr->next != NULL) && (id_planeptr->next->dimension2<dimension2) && (id_planeptr->next->dimension1<=dimension1))				id_planeptr = id_planeptr->next;			/* the new id_plane already exist in the linked list */			if (id_planeptr->next != NULL)				if ((id_planeptr->next->dimension1 == dimension1) && (id_planeptr->next->dimension2 == dimension2))	{				return_value = 0;				/* case the id does not match, add a new id */				for (i=0; i<id_planeptr->next->no_id; i++)	{						if (id_planeptr->next->id[i] == id)	{						found=1;						break;					}					}				if (found != 1)	{					id_planeptr->next->no_id++;					id_planeptr->next->id = (int *)realloc (id_planeptr->next->id, sizeof (int) * id_planeptr->next->no_id);					id_planeptr->next->id[id_planeptr->next->no_id-1] = id;				}				/* case the id matches, do nothing */				return (return_value);			}						/* the new id_plane does not exist in the linked list */			{				return_value = 1;				/* allocate the structure to store the new id plane */				temp_id_planeptr = (ID_Planeptr) malloc(sizeof (ID_Plane));				temp_id_planeptr->dimension1 = dimension1;				temp_id_planeptr->dimension2 = dimension2;				temp_id_planeptr->no_id = 1;				temp_id_planeptr->id = (int *)malloc (sizeof (int));				temp_id_planeptr->id[0] = id;						/* case for the reaching the last element of the linked list */						if (id_planeptr->next == NULL)	{					temp_id_planeptr->next = NULL;				}				/* case for not reaching the last element of the linked list */				else	{					temp_id_planeptr->next = id_planeptr->next;				}				id_planeptr->next = temp_id_planeptr;				return (return_value);			}	return (return_value);	/* 1 for a new id_plane node is inserted, 0 otherwise */}/*---------------------------------------------------------------------        Function        :       readin_Points        Purpose         :       To read the data point information from the file and store in the Dataset structure  ----------------------------------------------------------------------*/void readin_Points(FILE *datafile, Dataset *dataset){	long id;					/* temp. store the id of point */        long i;                                         /* For looping index */        int j;                                          /* For looping index */        char buffer[10000];                              /* Buffer for reading data */        int comment;                                    /* indicate whether the line of readin stream is comment or not */        /* Read the first four line of data file (no_point, dimension, left_margin, right_margin) */        fgets(buffer, 10000, datafile);        fgets(buffer, 10000, datafile);        fgets(buffer, 10000, datafile);        fgets(buffer, 10000, datafile);        /* For each data point */        for (i=0; i<dataset->nopoint; i++)        {
                /* check to see if the following line is comment or not */                comment = getc(datafile);                if (comment == '/')	{                        fgets(buffer, 1000, datafile);				i--;		}                else                        ungetc(comment, datafile);                /* get the point id */                fscanf(datafile, "%ld", &id);                /* For each dimension */                for (j=0; j<dataset->nodimension; j++)                        /* Fill in the value to memory */                        fscanf(datafile, "%f", &(dataset->point[id].component[j]));                fgets(buffer, 1000, datafile);          /*get the \n at the end of each line*/        }}/*---------------------------------------------------------------------        Function        :       print_Points (debug function)        Purpose         :       To print the data point information to stdout  ----------------------------------------------------------------------*/void print_Points(Dataset *dataset){	long i;	int j;	printf ("No Point %d\n", dataset->nopoint);	printf ("Dimension %d\n", dataset->nodimension);	for (i=0; i<dataset->nopoint; i++)	{		printf ("Point %ld\t%d\t\t", i, dataset->point[i].clust_assg);		for (j=0; j<dataset->nodimension; j++)				printf ("%.3f\t", dataset->point[i].component[j]);		printf ("\n");	}}/*---------------------------------------------------------------------        Function        :       gen_histogram (Histogram Generation)        Purpose         :       Generate D 1-d Histograms in each dimension for all data points        Algorithm       :       For each point					For each dimension						locate the interval where the point falls into 						add the influence of the point to the interval it locates and its neighbourhood  ----------------------------------------------------------------------*/void gen_histogram_2d(FILE *output, Dataset *dataset, float left_margin, float right_margin, Histogram_2d *histogram){ 	long i;	int j, k;	int m, n;	int interval1, interval2;	int influence_width=0;	long test=0;		float interval_length;	/* calculate the corresponding interval length */	interval_length = (right_margin-left_margin)/NO_INTERVAL;	/* For each point and its dimension, add its influence to the corresponding Histogram slot */	for (i=0; i<dataset->nopoint; i++)	for (j=0; j<dataset->nodimension; j++)	{		/* find interval where the point locate */		interval1 = (dataset->point[i].component[j]-left_margin)/interval_length;			/* handle the case coordinate = margin */		if (interval1==NO_INTERVAL)			interval1--;		else if (interval1<0)			interval1++;		for (k=j+1; k<dataset->nodimension; k++)	{			/* find interval where the point locate */			interval2 = (dataset->point[i].component[k]-left_margin)/interval_length;			/* handle the case coordinate = margin */			if (interval2==NO_INTERVAL)				interval2--;			else if (interval2<0)				interval2++;			/* m is the vertical offset */                	for (m=influence_width; m>=0; m--)    {				/* n is the horizontal offset */ 	                	for (n=0; n<=influence_width-m; n++)  {				    if (m!=0)	{                                	if (interval1+m<NO_INTERVAL)	{					    if (n!=0)	{						if (interval2+n<NO_INTERVAL)	{	                                       		histogram[j*dataset->nodimension+k].density[(interval1+m)*NO_INTERVAL+interval2+n] += (influence_width+1-m-n);						}					    }						if (interval2-n>=0)	{	                                       		histogram[j*dataset->nodimension+k].density[(interval1+m)*NO_INTERVAL+interval2-n] += (influence_width+1-m-n);						}					}				    }					if (interval1-m>=0)	{					    if (n!=0)	{						if (interval2+n<NO_INTERVAL)	{	                                      		histogram[j*dataset->nodimension+k].density[(interval1-m)*NO_INTERVAL+interval2+n] += (influence_width+1-m-n);						}					    }						if (interval2-n>=0)	{	                                       		histogram[j*dataset->nodimension+k].density[(interval1-m)*NO_INTERVAL+interval2-n] += (influence_width+1-m-n);						}					}				}			}		}	}}/*---------------------------------------------------------------------        Function        :       print_histogram_2d        Purpose         :       To print the density of each interval in all built histograms  ----------------------------------------------------------------------*/void print_histogram_2d (FILE *fp, Dataset *dataset, Histogram_2d *histogram, int histogram_id){	int m, n;	int dimension1, dimension2;	dense_id_2_dim (histogram_id, dataset->nodimension, &dimension1, &dimension2);			fprintf (fp, "Dense Region on Dimensions %d %d\n", dimension1, dimension2);			for (m=0; m<NO_INTERVAL; m++)	{				for (n=0; n<NO_INTERVAL; n++)	{					fprintf (fp, "%2d", histogram[histogram_id].dense_id[m*NO_INTERVAL+n]);				}				fprintf (fp, "\n");			}}float find_threshold_histogram_2d (FILE *output, Histogram_2d *histogram, int histogram_id, int iteration){	float threshold;	int i;	int no_non_dense=0;	float mean=0.0;	float sd = 0;		for (i=0; i<NO_INTERVAL*NO_INTERVAL; i++)	{		if (histogram[histogram_id].dense_id[i] == 0)	{			mean += histogram[histogram_id].density[i];			no_non_dense++;		}	}	mean /= no_non_dense;	for (i=0; i<NO_INTERVAL*NO_INTERVAL; i++)	{		if (histogram[histogram_id].dense_id[i] == 0)	{			sd += pow ((histogram[histogram_id].density[i]-mean), 2);		}	}	sd /= no_non_dense;	sd = sqrt (sd);	threshold = mean + (float) THRESHOLD * sd;	//	printf ("mean %.3f, sd %.3f , threshold %.3f \n", mean, sd, threshold);	return (threshold);}	void countdense_2d (FILE *output, Histogram_2d *histogram, int histogram_id){	int m, n, i;	int count[100];	int found=0;
#ifndef WIN32	bzero (count, sizeof(int) * 100);#else
	memset(count, 0, sizeof(int)*100);
#endif
	for (m=0; m<NO_INTERVAL; m++)	{		for (n=0; n<NO_INTERVAL; n++)	{			found=0;			if (histogram[histogram_id].dense_id[m*NO_INTERVAL+n] != 0)	{				if (histogram [histogram_id].maxid < histogram[histogram_id].dense_id[m*NO_INTERVAL+n])					histogram[histogram_id].maxid = histogram[histogram_id].dense_id[m*NO_INTERVAL+n];				for (i=0; count[i]!=0; i++)	{					if (count[i] == histogram[histogram_id].dense_id[m*NO_INTERVAL+n])	{						found = 1;						break;					}				}				if (found == 0)					count[i] = histogram[histogram_id].dense_id[m*NO_INTERVAL+n];			}		}	}	for (i=0; count[i]!=0; i++)		;	histogram[histogram_id].no_dense=i;}void searchdense_2d (FILE *output, Histogram_2d *histogram, int i, int j, int histogram_id, int original, float threshold){	int position;		position = j*NO_INTERVAL+i;	/* go right */	if (i<NO_INTERVAL-1)		if ((histogram[histogram_id].density[position+1]>=threshold) && (histogram[histogram_id].dense_id[position+1]<histogram[histogram_id].dense_id[position]))	{			histogram[histogram_id].dense_id[position+1] = histogram[histogram_id].dense_id[position];			searchdense_2d (output, histogram, i+1, j, histogram_id, original, threshold);		}	/* go left */	if (i>0)		if ((histogram[histogram_id].density[position-1]>=threshold) && (histogram[histogram_id].dense_id[position-1]<histogram[histogram_id].dense_id[position]))	{			histogram[histogram_id].dense_id[position-1] = histogram[histogram_id].dense_id[position];			searchdense_2d (output, histogram, i-1, j, histogram_id, original, threshold);		}	/* go down */	if (j<NO_INTERVAL-1)		if ((histogram[histogram_id].density[position+NO_INTERVAL]>=threshold) &&  (histogram[histogram_id].dense_id[position+NO_INTERVAL]<histogram[histogram_id].dense_id[position]))	{			histogram[histogram_id].dense_id[position+NO_INTERVAL] = histogram[histogram_id].dense_id[position];			searchdense_2d (output, histogram, i, j+1, histogram_id, original, threshold);		}	/* go up */	if (j>0)		if ((histogram[histogram_id].density[position-NO_INTERVAL]>=threshold) &&  (histogram[histogram_id].dense_id[position-NO_INTERVAL]<histogram[histogram_id].dense_id[position]))	{			histogram[histogram_id].dense_id[position-NO_INTERVAL] = histogram[histogram_id].dense_id[position];			searchdense_2d (output, histogram, i, j-1, histogram_id, original, threshold);		}	/* go 45 degree up right */	if ((i<NO_INTERVAL-1) && (j>0))		if ((histogram[histogram_id].density[position-NO_INTERVAL+1]>=threshold) &&  (histogram[histogram_id].dense_id[position-NO_INTERVAL+1]<histogram[histogram_id].dense_id[position]))	{			histogram[histogram_id].dense_id[position-NO_INTERVAL+1] = histogram[histogram_id].dense_id[position];			searchdense_2d (output, histogram, i+1, j-1, histogram_id, original, threshold);		}	/* go 135 degree down right*/	if ((i<NO_INTERVAL-1) && (j<NO_INTERVAL-1))		if ((histogram[histogram_id].density[position+NO_INTERVAL+1]>=threshold) &&  (histogram[histogram_id].dense_id[position+NO_INTERVAL+1]<histogram[histogram_id].dense_id[position]))	{			histogram[histogram_id].dense_id[position+NO_INTERVAL+1] = histogram[histogram_id].dense_id[position];			searchdense_2d (output, histogram, i+1, j+1, histogram_id, original, threshold);		}	/* go 225 degree down left */	if ((i>0) && (j<NO_INTERVAL-1))		if ((histogram[histogram_id].density[position+NO_INTERVAL-1]>=threshold) &&  (histogram[histogram_id].dense_id[position+NO_INTERVAL-1]<histogram[histogram_id].dense_id[position]))	{			histogram[histogram_id].dense_id[position+NO_INTERVAL-1] = histogram[histogram_id].dense_id[position];			searchdense_2d (output, histogram, i-1, j+1, histogram_id, original, threshold);		}	/* go 315 degree up left */	if ((i>0) && (j>0))		if ((histogram[histogram_id].density[position-NO_INTERVAL-1]>=threshold) &&  (histogram[histogram_id].dense_id[position-NO_INTERVAL-1]<histogram[histogram_id].dense_id[position]))	{			histogram[histogram_id].dense_id[position-NO_INTERVAL-1] = histogram[histogram_id].dense_id[position];			searchdense_2d (output, histogram, i-1, j-1, histogram_id, original, threshold);		}}void detectdense_2d (FILE *output, Histogram_2d *histogram, int histogram_id, float threshold, int *current_id){	int x, y;	for (y=0; y<NO_INTERVAL; y++)			for (x=0; x<NO_INTERVAL; x++)							if ((histogram[histogram_id].density[y*NO_INTERVAL+x]>=threshold) && (histogram[histogram_id].dense_id[y*NO_INTERVAL+x] == 0))	{				histogram[histogram_id].dense_id[y*NO_INTERVAL+x] = (*current_id)++;				searchdense_2d (output, histogram, x, y, histogram_id, 0, threshold);			}	}	/*---------------------------------------------------------------------        Function        :       derive_Signature_2d        Purpose         :       Given the located desne regions, derive the signatures for all datapoints 	Algorithm	: 	For each datapoint					For each dimension						check in which interval does the datapoint locate						assign the value to the appropriate entry of the signature of the datapoint				argument left/right is the left/right margin of the space  ----------------------------------------------------------------------*/void derive_Signature_2d (Dataset *dataset, Histogram_2d *histogram, float left, float right){	long n;	int j, k;	int m;	int interval1, interval2;	int entry_no;	float interval_length = (right-left)/NO_INTERVAL;	/* for each datapoint */		for (n=0; n<dataset->nopoint; n++)	{		for (j=0; j<dataset->nodimension; j++)	{			interval1 = (dataset->point[n].component[j]-left)/interval_length;			/* to avoid the right boundary problem */			if (interval1 == NO_INTERVAL)				interval1--;			for (k=j+1; k<dataset->nodimension; k++)	{				interval2 = (dataset->point[n].component[k]-left)/interval_length;				/* to avoid the right boundary problem */				if (interval2 == NO_INTERVAL)					interval2--;				entry_no = 0;				for (m=dataset->nodimension-1; m>=dataset->nodimension-j; m--)						entry_no += m;				entry_no += (k-j);				entry_no--; 				dataset->point[n].signature_2d[entry_no] = histogram[j*dataset->nodimension+k].dense_id[interval1*NO_INTERVAL+interval2];			}		}	}}void print_Signature_2d (Dataset *dataset, int no_entry, Histogram_2d *histogram){	int i;	long n;	int *base;	int dimension1, dimension2;	int noitem;			base = (int *)malloc (sizeof (int) * no_entry);
#ifndef WIN32	bzero (base, sizeof (int) * no_entry);#else
	memset(base, 0, sizeof(int)*no_entry);
#endif
	for (i=0; i<no_entry-1; i++)	{		dense_id_2_dim(i, dataset->nodimension, &dimension1, &dimension2);//		printf ("(%d)(%d)(%d)%d ", i, dimension1, dimension2, histogram[dimension1*dataset->nodimension+dimension2].no_dense);		base [i+1] = base[i]+histogram[dimension1*dataset->nodimension+dimension2].maxid+1;	}//	printf ("\n");		for (i=0; i<no_entry; i++)	{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -