📄 cluster-no-noise.c
字号:
*/ } /* * cluster until done */ while (nodes > csize) { BiTree *newitem; FLOAT dist, min_dist; int pair1, pair2; /* * find minimum distance pair */ pair1 = NONE; min_dist = 0.0; for (i = 0; i < npat; i++) { if (item[i].root == FALSE || item[i].pruned == TRUE) continue; if (nnb_index[i] != NONE && (pair1 == NONE || nnb_dist[i] < min_dist)) { pair1 = i; pair2 = nnb_index[i]; min_dist = nnb_dist[i]; } } if (pair1 == NONE) break; /* analysis finished */ min_dist = root(min_dist); if (dflag) { printf("minimum distance = %f\t(", (float)min_dist); print_names (&item[pair1], name); printf(" )\t("); print_names (&item[pair2], name); printf(" )\n"); } merge(item, pair1, pair2, lpat, min_dist); /* * update nearest neighbors */ for (i = 0; i < npat; i++) { if (nnb_index[i] == NONE || item[i].root == FALSE || item[i].pruned == TRUE) { continue; } else if (nnb_index[i] == pair1 || nnb_index[i] == pair2) { /* * worst case: the old nnb is the node that will disappear. * recompute nnb from scratch */ find_nnb(item, i, lpat, &nnb_index[i], &nnb_dist[i]); } else if (pair1 < i && (dist = cure_distance(&item[pair1], &item[i], lpat)) < nnb_dist[i]) { /* * distance to new node is smaller than previous nnb, * so make it the new nnb. */ nnb_index[i] = pair1; nnb_dist[i] = dist; } } /* number of nodes have been reduced */ --nodes; /* prune clusters based on their size */ if (nodes == (int)(npat * FirstPruneRatio)) {printf("==== First phase of pruning at %d nodes remaining ====\n", nodes); for (i = 0; i < npat; i++) { if (item[i].root == TRUE && item[i].size < FirstCut) {printf("==== %d of size %d removed\n", i, item[i].size); item[i].pruned = TRUE; --nodes; } } /* recalc min dist */ for (i = 0; i < npat; i++) { if (item[i].root == TRUE && item[nnb_index[i]].pruned == TRUE) find_nnb(item, i, lpat, &nnb_index[i], &nnb_dist[i]); } } else if (nodes == csize * SecondPruneMulti) {printf("==== Second phase of pruning at %d nodes remaining ====\n", nodes); for (i = 0; i < npat; i++) { if (item[i].root == TRUE && item[i].pruned == FALSE && item[i].size < SecondCut) {printf("==== %d of size %d removed\n", i, item[i].size); item[i].pruned = TRUE; --nodes; } } /* recalc min dist */ for (i = 0; i < npat; i++) { if (item[i].root == TRUE && item[nnb_index[i]].pruned == TRUE) find_nnb(item, i, lpat, &nnb_index[i], &nnb_dist[i]); } } } /* put back noise back to the clusters */ for (i=0; i<npat; ++i) { if (item[i].root == TRUE && item[i].pruned == TRUE) { assign_noise_cluster(item, i, npat, lpat); } }#if 0 /* * search for root */ j = 0; for (i = 0; i < npat; i++) { if (item[i].root == TRUE) { if (tflag) { if (vflag) printf("Cluster %d Tree = \n", j); IfErr (print_tree(&item[i], name, npat, width)) { fprintf(stderr, "%s: %s: error printing tree\n", program, ERR_MSG); exit(1); } } if (gflag) { if (vflag) printf("Cluster %d Tree Graph= \n", j); graph_tree(&item[i], name, npat, bflag); } if (Tflag) {#ifdef HAVE_CURSES if (vflag) printf("Cluster %d Displaying tree with curses...\n", j); curses_tree(&item[i], name, npat, width);#else fprintf(stderr, "Sorry, no curses support available.\n");#endif } if (Bflag) { print_bits(&item[i], name, npat); } printf("\n"); ++j; } }#endif /* assign cluster number */ cno = 0; for (i=0; i<npat; ++i) { if (item[i].root == TRUE && item[i].pruned == FALSE) { assign_cluster(&item[i], cno, pinfo); ++cno; } } /* one more round of noise cluster assignment */ for (i=0; i<npat; ++i) { assign_noise_cluster_post(item, &item[i], npat, lpat, pinfo); }}static FLOATcure_distance(x, y, lpat) BiTree *x, *y; int lpat;{ register int i, j; FLOAT min_dist = MAXFLOAT, dist; int xmax, ymax; if (x->size > rsize) xmax = rsize; else xmax = x->size; if (y->size > rsize) ymax = rsize; else ymax = y->size; for (i=0; i<xmax; ++i) for (j=0; j<ymax; ++j) if ((dist = distance(x->rep[i], y->rep[j], lpat)) < min_dist) min_dist = dist; return min_dist;}static FLOATcure_one_distance(x, y, lpat) BiTree *x, *y; int lpat;{ register int i, j; FLOAT min_dist = MAXFLOAT, dist; int xmax, ymax; if (y->size > rsize) ymax = rsize; else ymax = y->size; for (j=0; j<ymax; ++j) if ((dist = distance(x->pat, y->rep[j], lpat)) < min_dist) min_dist = dist; return min_dist;}static FLOATdistance(pat1, pat2, lpat) FLOAT *pat1, *pat2; int lpat;{ register int i; FLOAT dist = 0.0; for (i = 0; i < lpat; i++) { FLOAT diff = 0.0;#ifndef NO_DONTCARES if (!IS_DC(pat1[i]) && !IS_DC(pat2[i]))#endif diff = pat1[i] - pat2[i]; switch (norm) { FLOAT adiff; case 2: dist += diff * diff; break; case 1: dist += fabs(diff); break; case 0: if ((adiff = fabs(diff)) > dist) dist = adiff; break; default: dist += pow(fabs(diff), (double) norm); break; } } return dist;}static FLOATroot(dist) FLOAT dist;{ switch (norm) { case 2: return sqrt(dist); case 1: case 0: return dist; default: return pow(dist, 1 / (double) norm); }}BiTree *new_tree(item) BiTree *item;{ BiTree *tree; IfErr (tree = new(BiTree)) return NULL; tree->r_tree = item->r_tree; tree->l_tree = item->l_tree; tree->leaf = item->leaf; tree->root = item->root; tree->size = item->size; tree->distance = item->distance; tree->pat = item->pat; return tree;}#define Blksize 128read_pattern(Pfile, patternP, lpatternP, npatternP, nameP) FILE *Pfile; /* ptrs to pattern file */ FLOAT ***patternP; int *lpatternP, *npatternP; char ***nameP;{ register int i; int status; int Asize = Blksize; /* current array size */ /***** these local variables are temporary storage and are **** ***** copied to the real variables if there is no error ******/ int lpattern, npattern; /* size and # of patterns */ FLOAT **pattern; FLOAT *first_pattern; char **name = NULL; char *first_name = NULL; FLOAT *scales = NULL; IfErr(lpattern = nstrings(Pfile, &first_pattern, &first_name)) return MY_ERR; /* check for scaling info */ if (first_name != NULL && strcmp(first_name, SCALE) == 0) { scales = first_pattern; free(first_name); /* read first vector again */ IfErr(status = nstrings(Pfile, &first_pattern, &first_name)) return MY_ERR; if (status != lpattern) Erreturn("scaling vector not matching data"); } /* allocate space for input/target arrays */ IfErr(pattern = new_2d_array_of(Asize, lpattern, FLOAT)) Erreturn("cannot allocate memory for patterns"); if (first_name != NULL) IfErr(name = new_array_of(Asize, char *)) Erreturn("cannot allocate memory for names"); /**** this loop reads in one line from pattern file,** **** stores each pattern into pattern buffer *****/ for (npattern = 0;; npattern++) { if (npattern >= Asize) {/* need to allocate more space for arrays */ IfErr(pattern = change_2d_array_size( pattern, Asize, lpattern, Asize + Blksize, lpattern, FLOAT)) Erreturn("cannot allocate memory for pattern "); if (name != NULL) IfErr(name = change_array_size(name, Asize + Blksize, char *)) Erreturn("cannot allocate memory for pattern "); Asize += Blksize; /* array size is now Blksize bigger */ } if (first_pattern != NULL) { /* copy data from first line */ for (i = 0; i < lpattern; i++) pattern[npattern][i] = first_pattern[i]; free(first_pattern); first_pattern = NULL; if (first_name != NULL) { name[npattern] = first_name; first_name = NULL; } } else { /* read data from file */ for (i = 0; i < lpattern; i++) { IfEOF(status = fscanf(Pfile, "%s", buffer)) { if (i == 0) break; Erreturn1("cannot read pattern # %d", npattern); }#ifndef NO_DONTCARES if (strcmp(buffer, DONTCARE) == 0) pattern[npattern][i] = DC_VAL; else#endif { double f; IfErr (status = sscanf(buffer, "%lf", &f)) Erreturn1("cannot read pattern # %d", npattern); pattern[npattern][i] = f; } } IfEOF(status) break; if (name != NULL) { char c = skip_blanks(Pfile); if (c == '\"') { getc(Pfile); fgets(buffer, sizeof(buffer), Pfile); buffer[strlen(buffer) - 1] = '\0'; } else { IfEOF(status = fscanf(Pfile, "%s", buffer)) Erreturn("not enough names"); skip_blanks(Pfile); } IfErr(name[npattern] = new_string(buffer)) Erreturn("not enough core"); } IfEOF(status) break; } if (!sflag && scales != NULL) /* scale data if requested */ for (i = 0; i < lpattern; i++) pattern[npattern][i] *= scales[i]; } /* if there is any error, these pointers below aren't changed */ /* free any space already allocated for patterns */ if (*patternP != NULL) free_2d_array(*patternP, *npatternP); if (*nameP != NULL) free(*nameP); *npatternP = npattern; /* # of patterns read in from file */ *lpatternP = lpattern; /* # of elements in pattern */ *patternP = pattern; /* input array */ *nameP = name; /* name array */ return MY_OK; /* patterns were read in without error */}nstrings(fp, patternP, nameP) /* counts number of strings in the first line * of file */ FILE *fp; FLOAT **patternP; char **nameP;{ register int i; int c; FLOAT *pattern; char *name = NULL; IfErr (pattern = new_array_of(1, FLOAT)) Erreturn("not enough core"); for (i = 0;; i++) { double f; if ((c = skip_blanks(fp)) == '\n') break; /* read field (number of name) */ if (c == '\"') { getc(fp); /* discard quote */ fgets(buffer, sizeof(buffer), fp); buffer[strlen(buffer) - 1] = '\0'; } else if (fscanf(fp, "%s", buffer) != 1) break;#ifndef NO_DONTCARES if (c != '\"' && strcmp(buffer, DONTCARE) == 0) pattern[i] = DC_VAL; else#endif if (c == '\"' || sscanf(buffer, "%lf", &f) != 1) { IfErr (name = new_string(buffer)) Erreturn("not enough core"); if (c != '\"') skip_blanks(fp); break; } pattern[i] = f; IfErr(pattern = change_array_size(pattern, (i + 2), FLOAT)) Erreturn("not enough core"); } if (i == 0) Erreturn("empty pattern"); *patternP = pattern; *nameP = name; return i;}voidmerge(item, pair1, pair2, lpat, min_dist)BiTree *item;int pair1;int pair2;int lpat;float min_dist;{ register int i, j, k; BiTree *newitem; int newsize = item[pair1].size+item[pair2].size; FLOAT r1 = (float)item[pair1].size/newsize; FLOAT r2 = (float)item[pair2].size/newsize; FLOAT maxDist, *repPoint; IfErr (newitem = new_tree(&item[pair1])) { /* copy */ fprintf(stderr, "%s: not enough core\n", program); exit(1); } /* * replace left child node with new tree node * link right child node into parent */ item[pair1].l_tree = newitem; item[pair1].r_tree = &item[pair2]; item[pair1].leaf = LEAF; /* ith item cannot be a leaf it has * subtrees */ /* set the size and clear up some memory */ item[pair1].size = newsize; item[pair1].distance = min_dist; newitem->root = FALSE; item[pair2].root = FALSE; /* jth item is no longer a root.its a * subtree */ for (i=0; (i<rsize && i<item[pair2].size); ++i) free(item[pair2].rep[i]); free(item[pair2].rep); /* find mean of two clusters as weighted average*/ for (i=0; i<lpat; ++i) item[pair1].mean[i] = r1*item[pair1].mean[i] + r2*item[pair2].mean[i]; free(item[pair2].mean);/*printf("merge mean : %f %f\n", item[pair1].mean[0], item[pair1].mean[1]);*/ /* find new representative points */ for (i=0; (i < rsize && i < newsize); ++i) { maxDist = 0; repInTree(&item[pair1], i, item[pair1].mean, item[pair1].rep, lpat, &maxDist, &repPoint); memcpy(item[pair1].rep[i], repPoint, lpat*sizeof(FLOAT));/*printf("rep %d before : %f %f\n", i, item[pair1].rep[i][0], item[pair1].rep[i][1]);*/ } /* shrink based on alpha */ for (j=0; j<i; ++j) for (k=0; k<lpat; ++k) item[pair1].rep[j][k] += (alpha*(item[pair1].mean[k]- item[pair1].rep[j][k]));}voidrepInTree(item, rcount, mean, rep, lpat, maxDist, repPoint)BiTree *item;int rcount;FLOAT *mean;FLOAT **rep;int lpat;FLOAT *maxDist;FLOAT **repPoint;{ register int i; FLOAT minDist, dist; if (item->leaf != LEAF) { if (rcount == 0) minDist = distance(item->pat, mean, lpat); else { minDist = MAXFLOAT; for (i=0; i<rcount; ++i) if ((dist = distance(item->pat, rep[i], lpat)) < minDist) minDist = dist; } if (minDist >= *maxDist) { *maxDist = minDist; *repPoint = item->pat; } } else { repInTree(item->l_tree, rcount, mean, rep, lpat, maxDist, repPoint); repInTree(item->r_tree, rcount, mean, rep, lpat, maxDist, repPoint); }}voidprint_rep_points(item, cno, lpat) BiTree *item; int cno; int lpat;{ register int i, j; printf("**** Cluster %d from CURE Rep Points ****\n", cno); for (i=0; i<item->size && i<rsize; ++i) { for (j=0; j<lpat; ++j) printf("%f ", item->rep[i][j]); printf("\n"); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -