⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cluster-no-noise.c

📁 经典cure聚类算法,实现用c++语言.大家
💻 C
📖 第 1 页 / 共 2 页
字号:
*/    }    /*     * cluster until done     */    while (nodes > csize) {	BiTree  *newitem;	FLOAT   dist, min_dist;	int     pair1, pair2;		/*	 * find minimum distance pair	 */	pair1 = NONE;	min_dist = 0.0;	for (i = 0; i < npat; i++) {	    if (item[i].root == FALSE || item[i].pruned == TRUE)		continue;	    if (nnb_index[i] != NONE &&		(pair1 == NONE || nnb_dist[i] < min_dist)) {		pair1 = i;		pair2 = nnb_index[i];		min_dist = nnb_dist[i];	    }	}	if (pair1 == NONE)	    break;		/* analysis finished */	min_dist = root(min_dist);	if (dflag) {	    printf("minimum distance = %f\t(", (float)min_dist);	    print_names (&item[pair1], name);	    printf(" )\t(");	    print_names (&item[pair2], name);	    printf(" )\n");	}        merge(item, pair1, pair2, lpat, min_dist);	/*	 * update nearest neighbors	 */	for (i = 0; i < npat; i++) {	    if (nnb_index[i] == NONE || item[i].root == FALSE || 	        item[i].pruned == TRUE) {		continue;	    } else if (nnb_index[i] == pair1 || nnb_index[i] == pair2) {		/*		 * worst case: the old nnb is the node that will disappear.		 * recompute nnb from scratch		 */		find_nnb(item, i, lpat, &nnb_index[i], &nnb_dist[i]);	    } else if (pair1 < i &&		       (dist = cure_distance(&item[pair1], &item[i], lpat))			    < nnb_dist[i]) {		/*		 * distance to new node is smaller than previous nnb,		 * so make it the new nnb.		 */		nnb_index[i] = pair1;		nnb_dist[i] = dist;	    }	}	/* number of nodes have been reduced */	--nodes;	/* prune clusters based on their size */	if (nodes == (int)(npat * FirstPruneRatio)) {printf("==== First phase of pruning at %d nodes remaining ====\n", nodes);	    for (i = 0; i < npat; i++) {		if (item[i].root == TRUE && item[i].size < FirstCut) {printf("====      %d of size %d removed\n", i, item[i].size);			item[i].pruned = TRUE;			--nodes;		}	    }	    /* recalc min dist */	    for (i = 0; i < npat; i++) {		if (item[i].root == TRUE && item[nnb_index[i]].pruned == TRUE)			find_nnb(item, i, lpat, &nnb_index[i], &nnb_dist[i]);	    }	}	else if (nodes == csize * SecondPruneMulti) {printf("==== Second phase of pruning at %d nodes remaining ====\n", nodes);	    for (i = 0; i < npat; i++) {		if (item[i].root == TRUE && item[i].pruned == FALSE &&		    item[i].size < SecondCut) {printf("====      %d of size %d removed\n", i, item[i].size);			item[i].pruned = TRUE;			--nodes;		}	    }	    /* recalc min dist */	    for (i = 0; i < npat; i++) {		if (item[i].root == TRUE && item[nnb_index[i]].pruned == TRUE)			find_nnb(item, i, lpat, &nnb_index[i], &nnb_dist[i]);	    }	}    }    /* put back noise back to the clusters */    for (i=0; i<npat; ++i) {	if (item[i].root == TRUE && item[i].pruned == TRUE) {		assign_noise_cluster(item, i, npat, lpat);	}    }#if 0    /*     * search for root     */    j = 0;    for (i = 0; i < npat; i++) {      if (item[i].root == TRUE) {        if (tflag) {	  if (vflag)	    printf("Cluster %d Tree = \n", j);	   IfErr (print_tree(&item[i], name, npat, width)) {	    fprintf(stderr, "%s: %s: error printing tree\n", program, ERR_MSG);	    exit(1);	  }        }        if (gflag) {	  if (vflag)	    printf("Cluster %d Tree Graph= \n", j);	  graph_tree(&item[i], name, npat, bflag);        }        if (Tflag) {#ifdef HAVE_CURSES	  if (vflag)	    printf("Cluster %d Displaying tree with curses...\n", j);	  curses_tree(&item[i], name, npat, width);#else	  fprintf(stderr, "Sorry, no curses support available.\n");#endif        }        if (Bflag) {	    print_bits(&item[i], name, npat);        }	printf("\n");	++j;      }    }#endif    /* assign cluster number */    cno = 0;    for (i=0; i<npat; ++i) {	if (item[i].root == TRUE && item[i].pruned == FALSE) {		assign_cluster(&item[i], cno, pinfo);		++cno;	}    }    /* one more round of noise cluster assignment */    for (i=0; i<npat; ++i) {	assign_noise_cluster_post(item, &item[i], npat, lpat, pinfo);    }}static FLOATcure_distance(x, y, lpat)    BiTree *x, *y;    int     lpat;{    register int i, j;    FLOAT min_dist = MAXFLOAT, dist;    int xmax, ymax;    if (x->size > rsize)	xmax = rsize;    else	xmax = x->size;    if (y->size > rsize)	ymax = rsize;    else	ymax = y->size;    for (i=0; i<xmax; ++i) 	for (j=0; j<ymax; ++j)		if ((dist = distance(x->rep[i], y->rep[j], lpat)) < min_dist)			min_dist = dist;    return min_dist;}static FLOATcure_one_distance(x, y, lpat)    BiTree *x, *y;    int     lpat;{    register int i, j;    FLOAT min_dist = MAXFLOAT, dist;    int xmax, ymax;    if (y->size > rsize)        ymax = rsize;    else        ymax = y->size;    for (j=0; j<ymax; ++j)        if ((dist = distance(x->pat, y->rep[j], lpat)) < min_dist)                  min_dist = dist;    return min_dist;}static FLOATdistance(pat1, pat2, lpat)    FLOAT  *pat1, *pat2;    int     lpat;{    register int i;    FLOAT   dist = 0.0;    for (i = 0; i < lpat; i++) {	FLOAT   diff = 0.0;#ifndef NO_DONTCARES	if (!IS_DC(pat1[i]) && !IS_DC(pat2[i]))#endif	    diff = pat1[i] - pat2[i];	switch (norm) {	    FLOAT   adiff;	case 2:	    dist += diff * diff;	    break;	case 1:	    dist += fabs(diff);	    break;	case 0:	    if ((adiff = fabs(diff)) > dist)		dist = adiff;	    break;	default:	    dist += pow(fabs(diff), (double) norm);	    break;	}    }    return dist;}static FLOATroot(dist)    FLOAT   dist;{    switch (norm) {    case 2:	return sqrt(dist);    case 1:    case 0:	return dist;    default:	return pow(dist, 1 / (double) norm);    }}BiTree *new_tree(item)    BiTree *item;{    BiTree *tree;    IfErr (tree = new(BiTree))	return NULL;    tree->r_tree = item->r_tree;    tree->l_tree = item->l_tree;    tree->leaf = item->leaf;    tree->root = item->root;    tree->size = item->size;    tree->distance = item->distance;    tree->pat = item->pat;    return tree;}#define Blksize 128read_pattern(Pfile, patternP, lpatternP, npatternP, nameP)    FILE   *Pfile;		/* ptrs to pattern file */    FLOAT ***patternP;    int    *lpatternP, *npatternP;    char ***nameP;{    register int i;    int     status;    int     Asize = Blksize;	/* current array size */    /***** these local variables are temporary storage and are ****     ***** copied to the real variables if there is no error ******/    int     lpattern, npattern;	/* size and # of patterns */    FLOAT **pattern;    FLOAT  *first_pattern;    char  **name = NULL;    char   *first_name = NULL;    FLOAT  *scales = NULL;    IfErr(lpattern = nstrings(Pfile, &first_pattern, &first_name))	return MY_ERR;    /* check for scaling info */    if (first_name != NULL && strcmp(first_name, SCALE) == 0) {	scales = first_pattern;	free(first_name);	/* read first vector again */	IfErr(status = nstrings(Pfile, &first_pattern, &first_name))	    return MY_ERR;	if (status != lpattern)	    Erreturn("scaling vector not matching data");    }    /* allocate space for input/target arrays */    IfErr(pattern = new_2d_array_of(Asize, lpattern, FLOAT))	Erreturn("cannot allocate memory for patterns");    if (first_name != NULL)	IfErr(name = new_array_of(Asize, char *))	    Erreturn("cannot allocate memory for names");    /**** this loop reads in one line from pattern file,**     **** stores each pattern into pattern buffer    *****/    for (npattern = 0;; npattern++) {	if (npattern >= Asize) {/* need to allocate more space for arrays */	    IfErr(pattern = change_2d_array_size(		pattern, Asize, lpattern, Asize + Blksize, lpattern, FLOAT))		Erreturn("cannot allocate memory for pattern ");	    if (name != NULL)		IfErr(name = change_array_size(name, Asize + Blksize, char *))		    Erreturn("cannot allocate memory for pattern ");	    Asize += Blksize;	/* array size is now Blksize bigger */	}	if (first_pattern != NULL) {	/* copy data from first line */	    for (i = 0; i < lpattern; i++)		pattern[npattern][i] = first_pattern[i];	    free(first_pattern);	    first_pattern = NULL;	    if (first_name != NULL) {		name[npattern] = first_name;		first_name = NULL;	    }	}	else {			/* read data from file */	    for (i = 0; i < lpattern; i++) {		IfEOF(status = fscanf(Pfile, "%s", buffer)) {		    if (i == 0)			break;		    Erreturn1("cannot read pattern # %d", npattern);		}#ifndef NO_DONTCARES		if (strcmp(buffer, DONTCARE) == 0)		    pattern[npattern][i] = DC_VAL;		else#endif		{		    double f;		    IfErr (status = sscanf(buffer, "%lf", &f))			Erreturn1("cannot read pattern # %d", npattern);		    pattern[npattern][i] = f;		}	    }	    IfEOF(status) break;	    if (name != NULL) {		char    c = skip_blanks(Pfile);		if (c == '\"') {		    getc(Pfile);		    fgets(buffer, sizeof(buffer), Pfile);		    buffer[strlen(buffer) - 1] = '\0';		}		else {		    IfEOF(status = fscanf(Pfile, "%s", buffer))			Erreturn("not enough names");		    skip_blanks(Pfile);		}		IfErr(name[npattern] = new_string(buffer))		    Erreturn("not enough core");	    }	    IfEOF(status) break;	}	if (!sflag && scales != NULL)	/* scale data if requested */	    for (i = 0; i < lpattern; i++)		pattern[npattern][i] *= scales[i];    }    /* if there is any error, these pointers below aren't changed */    /* free any space already allocated for patterns */    if (*patternP != NULL)	free_2d_array(*patternP, *npatternP);    if (*nameP != NULL)	free(*nameP);    *npatternP = npattern;	/* # of patterns read in from file */    *lpatternP = lpattern;	/* # of elements in pattern */    *patternP = pattern;	/* input array */    *nameP = name;		/* name array */    return MY_OK;		/* patterns were read in without error */}nstrings(fp, patternP, nameP)	/* counts number of strings in the first line				 * of file */    FILE   *fp;    FLOAT **patternP;    char  **nameP;{    register int i;    int     c;    FLOAT  *pattern;    char   *name = NULL;    IfErr (pattern = new_array_of(1, FLOAT))	Erreturn("not enough core");    for (i = 0;; i++) {	double f;	if ((c = skip_blanks(fp)) == '\n')	    break;	/* read field (number of name) */	if (c == '\"') {	    getc(fp);		/* discard quote */	    fgets(buffer, sizeof(buffer), fp);	    buffer[strlen(buffer) - 1] = '\0';	}	else if (fscanf(fp, "%s", buffer) != 1)	    break;#ifndef NO_DONTCARES	if (c != '\"' && strcmp(buffer, DONTCARE) == 0)	    pattern[i] = DC_VAL;	else#endif	if (c == '\"' || sscanf(buffer, "%lf", &f) != 1) {	    IfErr (name = new_string(buffer))		Erreturn("not enough core");	    if (c != '\"')		skip_blanks(fp);	    break;	}	pattern[i] = f;	IfErr(pattern = change_array_size(pattern, (i + 2), FLOAT))	    Erreturn("not enough core");    }    if (i == 0)	Erreturn("empty pattern");    *patternP = pattern;    *nameP = name;    return i;}voidmerge(item, pair1, pair2, lpat, min_dist)BiTree *item;int pair1;int pair2;int lpat;float min_dist;{	register int i, j, k;	BiTree *newitem;	int newsize = item[pair1].size+item[pair2].size;	FLOAT r1 = (float)item[pair1].size/newsize;	FLOAT r2 = (float)item[pair2].size/newsize;	FLOAT maxDist, *repPoint;	IfErr (newitem = new_tree(&item[pair1])) { /* copy */	    fprintf(stderr, "%s: not enough core\n", program);	    exit(1);	}	/*	 * replace left child node with new tree node	 * link right child node into parent	 */	item[pair1].l_tree = newitem;	item[pair1].r_tree = &item[pair2];	item[pair1].leaf = LEAF;	/* ith item cannot be a leaf it has					 * subtrees */	/* set the size and clear up some memory */	item[pair1].size = newsize;	item[pair1].distance = min_dist;	newitem->root = FALSE;	item[pair2].root = FALSE;	/* jth item is no longer a root.its a					 * subtree */	for (i=0; (i<rsize && i<item[pair2].size); ++i)		free(item[pair2].rep[i]);	free(item[pair2].rep);	/* find mean of two clusters as weighted average*/	for (i=0; i<lpat; ++i)		item[pair1].mean[i] = r1*item[pair1].mean[i] + r2*item[pair2].mean[i];	free(item[pair2].mean);/*printf("merge mean : %f %f\n", item[pair1].mean[0], item[pair1].mean[1]);*/	/* find new representative points */	for (i=0; (i < rsize && i < newsize); ++i) {		maxDist = 0;		repInTree(&item[pair1], i, item[pair1].mean, item[pair1].rep, 			  lpat, &maxDist, &repPoint);		memcpy(item[pair1].rep[i], repPoint, lpat*sizeof(FLOAT));/*printf("rep %d before : %f %f\n", i, item[pair1].rep[i][0], item[pair1].rep[i][1]);*/	}	/* shrink based on alpha */	for (j=0; j<i; ++j)		for (k=0; k<lpat; ++k)			item[pair1].rep[j][k] += (alpha*(item[pair1].mean[k]-					                 item[pair1].rep[j][k]));}voidrepInTree(item, rcount, mean, rep, lpat, maxDist, repPoint)BiTree *item;int rcount;FLOAT *mean;FLOAT **rep;int lpat;FLOAT *maxDist;FLOAT **repPoint;{    register int i;    FLOAT minDist, dist;    if (item->leaf != LEAF) {	if (rcount == 0)		minDist = distance(item->pat, mean, lpat);	else {		minDist = MAXFLOAT;		for (i=0; i<rcount; ++i) 			if ((dist = distance(item->pat, rep[i], lpat)) < minDist)				minDist = dist;	}	if (minDist >= *maxDist) {		*maxDist = minDist;		*repPoint = item->pat;	}    }    else {		repInTree(item->l_tree, rcount, mean, rep, lpat, maxDist, 			  repPoint);		repInTree(item->r_tree, rcount, mean, rep, lpat, maxDist, 			  repPoint);    }}voidprint_rep_points(item, cno, lpat)    BiTree *item;    int    cno;    int    lpat;{    register int i, j;    printf("**** Cluster %d from CURE Rep Points ****\n", cno);    for (i=0; i<item->size && i<rsize; ++i) {	for (j=0; j<lpat; ++j)		printf("%f ", item->rep[i][j]);	printf("\n");    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -