📄 cluster-sim.c
字号:
FLOAT *nnb_sim = new_array_of(npat, FLOAT); if (item == NULL || nnb_index == NULL || nnb_sim == NULL) { fprintf(stderr, "%s: not enough core\n", program); exit(1); } /* * initialize leaf nodes */ for (i = 0; i < npat; i++) { item[i].slist = pattern[i]; item[i].root = TRUE; item[i].size = 1; item[i].leaf = i; item[i].sim = 0.0; item[i].l_tree = item[i].r_tree = NULL; } /* * initialize nearest neighbors */ for (i = 0; i < npat; i++) find_nnb(item, i, lpat, &nnb_index[i], &nnb_sim[i]); /* * cluster until done */ while (nodes > csize) { BiTree *newitem; FLOAT sim, max_sim; int pair1, pair2; /* * find minimum distance pair */ pair1 = NONE; max_sim = 0.0; for (i = 0; i < npat; i++) { if (item[i].root == FALSE) continue; if (nnb_index[i] != NONE && (pair1 == NONE || nnb_sim[i] > max_sim)) { pair1 = i; pair2 = nnb_index[i]; max_sim = nnb_sim[i]; } } if (pair1 == NONE) break; /* analysis finished *//* max_sim = root(max_sim);*/ if (dflag) { printf("max sim = %f\t(", (float)max_sim); print_names (&item[pair1], name); printf(" )\t("); print_names (&item[pair2], name); printf(" )\n"); } IfErr (newitem = new_tree(&item[pair1])) { /* copy */ fprintf(stderr, "%s: not enough core\n", program); exit(1); } /* * replace left child node with new tree node * link right child node into parent */ item[pair1].l_tree = newitem; item[pair1].r_tree = &item[pair2]; item[pair1].leaf = LEAF; /* ith item cannot be a leaf it has * subtrees */ IfErr (item[pair1].slist = new_array_of(lpat, FLOAT)) { fprintf(stderr, "%s: not enough core\n", program); exit(1); } item[pair1].size = newitem->size + item[pair2].size; item[pair1].sim = max_sim; newitem->root = FALSE; item[pair2].root = FALSE; /* jth item is no longer a root.its a * subtree */ /* update distance of the combined node */ for (k = 0; k < lpat; k++) { if (k != pair1 && item[k].root == TRUE) { item[pair1].slist[k] = CombSim(newitem->size, newitem->slist[k], item[pair2].size, item[pair2].slist[k], item[k].size, newitem->slist[pair2], UPGMA);/* WPGMA);*/ item[k].slist[pair1] = item[pair1].slist[k]; } else item[pair1].slist[k] = -1; } /* * update nearest neighbors */ for (i = 0; i < npat; i++) { if (nnb_index[i] == NONE) { continue; } else if (nnb_index[i] == pair1 || nnb_index[i] == pair2) { /* * worst case: the old nnb is the node that will disappear. * recompute nnb from scratch */ find_nnb(item, i, lpat, &nnb_index[i], &nnb_sim[i]); } else if (pair1 < i && (sim = item[pair1].slist[i]) > nnb_sim[i]) { /* * distance to new node is smaller than previous nnb, * so make it the new nnb. */ nnb_index[i] = pair1; nnb_sim[i] = sim; } } /* number of nodes have been reduced */ --nodes; } /* * search for root */tflag = 0; j = 0; for (i = 0; i < npat; i++) { if (item[i].root == TRUE) { if (tflag) { if (vflag) printf("Cluster %d Tree = \n", j); IfErr (print_tree(&item[i], name, npat, width)) { fprintf(stderr, "%s: %s: error printing tree\n", program, ERR_MSG); exit(1); } } if (gflag) { if (vflag) printf("Cluster %d Tree Graph= \n", j); graph_tree(&item[i], name, npat, bflag); } if (Tflag) {#ifdef HAVE_CURSES if (vflag) printf("Cluster %d Displaying tree with curses...\n", j); curses_tree(&item[i], name, npat, width);#else fprintf(stderr, "Sorry, no curses support available.\n");#endif } if (Bflag) { print_bits(&item[i], name, npat); } printf("\n"); ++j; } } if (csize > 0) { /* explicit size is given */ cno = 0; for (i=0; i < npat; ++i) if (item[i].root == TRUE) { printf("%d **** Cluster %d from Hierarchical Clustering ****\n", item[i].size, cno); print_k_cluster(&item[i], name, pinfo, cno); ++cno; } printf("-1 ****\n"); }}static FLOATsimilarity(pat1, pat2, lpat) FLOAT *pat1, *pat2; int lpat;{ return 0.0;}static FLOATdistance(pat1, pat2, lpat) FLOAT *pat1, *pat2; int lpat;{ register int i; FLOAT dist = 0.0; for (i = 0; i < lpat; i++) { FLOAT diff = 0.0;#ifndef NO_DONTCARES if (!IS_DC(pat1[i]) && !IS_DC(pat2[i]))#endif diff = pat1[i] - pat2[i]; switch (norm) { FLOAT adiff; case 2: dist += diff * diff; break; case 1: dist += fabs(diff); break; case 0: if ((adiff = fabs(diff)) > dist) dist = adiff; break; default: dist += pow(fabs(diff), (double) norm); break; } } return dist;}static FLOATroot(dist) FLOAT dist;{ switch (norm) { case 2: return sqrt(dist); case 1: case 0: return dist; default: return pow(dist, 1 / (double) norm); }}BiTree *new_tree(item) BiTree *item;{ BiTree *tree; IfErr (tree = new(BiTree)) return NULL; tree->r_tree = item->r_tree; tree->l_tree = item->l_tree; tree->leaf = item->leaf; tree->root = item->root; tree->size = item->size; tree->sim = item->sim; tree->slist = item->slist; return tree;}#define Blksize 128read_pattern(Pfile, patternP, lpatternP, npatternP, nameP) FILE *Pfile; /* ptrs to pattern file */ FLOAT ***patternP; int *lpatternP, *npatternP; char ***nameP;{ register int i; int status; int Asize = Blksize; /* current array size */ /***** these local variables are temporary storage and are **** ***** copied to the real variables if there is no error ******/ int lpattern, npattern; /* size and # of patterns */ FLOAT **pattern; FLOAT *first_pattern; char **name = NULL; char *first_name = NULL; FLOAT *scales = NULL; int x, y; int sim; /* get the size of similarity matrix */ fscanf(Pfile, "%d %d %d", &x, &npattern, &status); lpattern = npattern; /* allocate space for input/target arrays */ IfErr(pattern = new_2d_array_of(npattern, lpattern, FLOAT)) Erreturn("cannot allocate memory for patterns"); while (fscanf(Pfile, "%d %d %d\n", &sim, &x, &y) != EOF) { pattern[x-1][y-1] = sim; pattern[y-1][x-1] = sim; } /* if there is any error, these pointers below aren't changed */ /* free any space already allocated for patterns */ if (*patternP != NULL) free_2d_array(*patternP, *npatternP); if (*nameP != NULL) free(*nameP); *npatternP = npattern; /* # of patterns read in from file */ *lpatternP = lpattern; /* # of elements in pattern */ *patternP = pattern; /* input array */ *nameP = name; /* name array */ return MY_OK; /* patterns were read in without error */}nstrings(fp, patternP, nameP) /* counts number of strings in the first line * of file */ FILE *fp; FLOAT **patternP; char **nameP;{ register int i; int c; FLOAT *pattern; char *name = NULL; IfErr (pattern = new_array_of(1, FLOAT)) Erreturn("not enough core"); for (i = 0;; i++) { double f; if ((c = skip_blanks(fp)) == '\n') break; /* read field (number of name) */ if (c == '\"') { getc(fp); /* discard quote */ fgets(buffer, sizeof(buffer), fp); buffer[strlen(buffer) - 1] = '\0'; } else if (fscanf(fp, "%s", buffer) != 1) break;#ifndef NO_DONTCARES if (c != '\"' && strcmp(buffer, DONTCARE) == 0) pattern[i] = DC_VAL; else#endif if (c == '\"' || sscanf(buffer, "%lf", &f) != 1) { IfErr (name = new_string(buffer)) Erreturn("not enough core"); if (c != '\"') skip_blanks(fp); break; } pattern[i] = f; IfErr(pattern = change_array_size(pattern, (i + 2), FLOAT)) Erreturn("not enough core"); } if (i == 0) Erreturn("empty pattern"); *patternP = pattern; *nameP = name; return i;}/* function to define new similarity between (r, s) and k after r and s are combined newsim = a * r_k_sim + b * s_k_sim + c * r_s_sim + d * | r_k_sim - s_k_sim |*/static FLOATCombSim(r_size, r_k_sim, s_size, s_k_sim, k_size, r_s_sim, flag) FLOAT r_k_sim, s_k_sim, r_s_sim; int r_size, s_size, k_size, flag;{ FLOAT a, b, c, d, diff, newsim; int tot; switch (flag) { case SINGLE: /* check for correctness */ a = 0.5; b = 0.5; c = 0.0; d = -0.5; break; case COMPLETE: /* check for correctness */ a = 0.5; b = 0.5; c = 0.0; d = 0.5; break; case WPGMA: a = 0.5; b = 0.5; c = 0.0; d = 0.0; break; case UPGMC: tot = r_size + s_size; a = (float)r_size/tot; b = (float)s_size/tot; c = -1.0*r_size*s_size/(tot*tot); d = 0.0; break; case WPGMC: a = 0.5; b = 0.5; c = -0.25; d = 0.0; break; case WARD: tot = r_size + s_size + k_size; a = (float)(r_size+k_size)/tot; b = (float)(s_size+k_size)/tot; c = -1.0*k_size/tot; d = 0.0; break; case UPGMA: default: tot = r_size + s_size; a = (float)r_size/tot; b = (float)s_size/tot; c = 0.0; d = 0.0; break; } if (r_k_sim > s_k_sim) diff = r_k_sim - s_k_sim; else diff = s_k_sim - r_k_sim; newsim = a * r_k_sim + b * s_k_sim + c * r_s_sim + d * diff;/*printf("a : %f r_k_sim : %f\n", a, r_k_sim);printf("b : %f s_k_sim : %f\n", b, s_k_sim);printf("c : %f r_s_sim : %f\n", c, r_s_sim);printf("new sim : %f\n", newsim);*/ return newsim;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -