📄 cluster.c
字号:
/* swap row j, row N'-1 */ trow = mx[Np-1]; mx[Np-1] = mx[j]; mx[j] = trow; /* swap col j, col N'-1 */ for (row = 0; row < Np; row++) { tcol = mx[row][Np-1]; mx[row][Np-1] = mx[row][j]; mx[row][j] = tcol; } /* swap coord j, coord N'-1 */ SWAP(coord[j], coord[Np-1]); } /* average i and j together; they're now at Np-2 and Np-1 though */ i = Np-2; j = Np-1; /* merge by saving avg of cols of row i and row j */ for (col = 0; col < Np; col++) { switch (mode) { case CLUSTER_MEAN: mx[i][col] =(mx[i][col]+ mx[j][col]) / 2.0; break; case CLUSTER_MIN: mx[i][col] = MIN(mx[i][col], mx[j][col]); break; case CLUSTER_MAX: mx[i][col] = MAX(mx[i][col], mx[j][col]); break; default: mx[i][col] =(mx[i][col]+ mx[j][col]) / 2.0; break; } } /* copy those rows to columns */ for (col = 0; col < Np; col++) mx[col][i] = mx[i][col]; /* store the node index in coords */ coord[Np-2] = Np+N-2; } /************************** * Garbage collection and return **************************/ Free2DArray((void **) mx, N); free(coord); free(diff); *ret_tree = tree; return 1;}/* Function: AllocPhylo() * * Purpose: Allocate space for a phylo_s array. N-1 structures * are allocated, one for each node; in each node, a 0..N * is_in flag array is also allocated and initialized to * all zeros. * * Args: N - size; number of sequences being clustered * * Return: pointer to the allocated array * */struct phylo_s *AllocPhylo(int N){ struct phylo_s *tree; int i; if ((tree = (struct phylo_s *) malloc ((N-1) * sizeof(struct phylo_s))) == NULL) return NULL; for (i = 0; i < N-1; i++) { tree[i].diff = 0.0; tree[i].lblen = tree[i].rblen = 0.0; tree[i].left = tree[i].right = tree[i].parent = -1; tree[i].incnum = 0; if ((tree[i].is_in = (char *) calloc (N, sizeof(char))) == NULL) return NULL; } return tree;}/* Function: FreePhylo() * * Purpose: Free a clustree array that was built to cluster N sequences. * * Args: tree - phylogenetic tree to free * N - size of clustree; number of sequences it clustered * * Return: (void) */voidFreePhylo(struct phylo_s *tree, int N){ int idx; for (idx = 0; idx < N-1; idx++) free(tree[idx].is_in); free(tree);}/* Function: MakeDiffMx() * * Purpose: Given a set of aligned sequences, construct * an NxN fractional difference matrix. (i.e. 1.0 is * completely different, 0.0 is exactly identical). * * Args: aseqs - flushed, aligned sequences * num - number of aseqs * ret_dmx - RETURN: difference matrix * * Return: 1 on success, 0 on failure. * Caller must free diff matrix with FMX2Free(dmx) */voidMakeDiffMx(char **aseqs, int num, float ***ret_dmx){ float **dmx; /* RETURN: distance matrix */ int i,j; /* counters over sequences */ /* Allocate 2D float matrix */ dmx = FMX2Alloc(num, num); /* Calculate distances; symmetric matrix * record difference, not identity (1 - identity) */ for (i = 0; i < num; i++) for (j = i; j < num; j++) dmx[i][j] = dmx[j][i] = 1.0 - PairwiseIdentity(aseqs[i], aseqs[j]); *ret_dmx = dmx; return;}/* Function: MakeIdentityMx() * * Purpose: Given a set of aligned sequences, construct * an NxN fractional identity matrix. (i.e. 1.0 is * completely identical, 0.0 is completely different). * Virtually identical to MakeDiffMx(). It's * less confusing to have two distinct functions, I find. * * Args: aseqs - flushed, aligned sequences * num - number of aseqs * ret_imx - RETURN: identity matrix (caller must free) * * Return: 1 on success, 0 on failure. * Caller must free imx using FMX2Free(imx) */voidMakeIdentityMx(char **aseqs, int num, float ***ret_imx){ float **imx; /* RETURN: identity matrix */ int i,j; /* counters over sequences */ /* Allocate 2D float matrix */ imx = FMX2Alloc(num, num); /* Calculate distances, symmetric matrix */ for (i = 0; i < num; i++) for (j = i; j < num; j++) imx[i][j] = imx[j][i] = PairwiseIdentity(aseqs[i], aseqs[j]); *ret_imx = imx; return;}/* Function: PrintNewHampshireTree() * * Purpose: Print out a tree in the "New Hampshire" standard * format. See PHYLIP's draw.doc for a definition of * the New Hampshire format. * * Like a CFG, we generate the format string left to * right by a preorder tree traversal. * * Args: fp - file to print to * ainfo- alignment info, including sequence names * tree - tree to print * N - number of leaves * */voidPrintNewHampshireTree(FILE *fp, AINFO *ainfo, struct phylo_s *tree, int N){ struct intstack_s *stack; int code; float *blen; int docomma; blen = (float *) MallocOrDie (sizeof(float) * (2*N-1)); stack = InitIntStack(); PushIntStack(stack, N); /* push root on stack */ docomma = FALSE; /* node index code: * 0..N-1 = leaves; indexes of sequences. * N..2N-2 = interior nodes; node-N = index of node in tree structure. * code N is the root. * 2N..3N-2 = special flags for closing interior nodes; node-2N = index in tree */ while (PopIntStack(stack, &code)) { if (code < N) /* we're a leaf. */ { /* 1) print name:branchlength */ if (docomma) fputs(",", fp); fprintf(fp, "%s:%.5f", ainfo->sqinfo[code].name, blen[code]); docomma = TRUE; } else if (code < 2*N) /* we're an interior node */ { /* 1) print a '(' */ if (docomma) fputs(",\n", fp); fputs("(", fp); /* 2) push on stack: ), rchild, lchild */ PushIntStack(stack, code+N); PushIntStack(stack, tree[code-N].right); PushIntStack(stack, tree[code-N].left); /* 3) record branch lengths */ blen[tree[code-N].right] = tree[code-N].rblen; blen[tree[code-N].left] = tree[code-N].lblen; docomma = FALSE; } else /* we're closing an interior node */ { /* print a ):branchlength */ if (code == 2*N) fprintf(fp, ");\n"); else fprintf(fp, "):%.5f", blen[code-N]); docomma = TRUE; } } FreeIntStack(stack); free(blen); return;}/* Function: PrintPhylo() * * Purpose: Debugging output of a phylogenetic tree structure. */voidPrintPhylo(FILE *fp, AINFO *ainfo, struct phylo_s *tree, int N){ int idx; for (idx = 0; idx < N-1; idx++) { fprintf(fp, "Interior node %d (code %d)\n", idx, idx+N); fprintf(fp, "\tParent: %d (code %d)\n", tree[idx].parent-N, tree[idx].parent); fprintf(fp, "\tLeft: %d (%s) %f\n", tree[idx].left < N ? tree[idx].left-N : tree[idx].left, tree[idx].left < N ? ainfo->sqinfo[tree[idx].left].name : "interior", tree[idx].lblen); fprintf(fp, "\tRight: %d (%s) %f\n", tree[idx].right < N ? tree[idx].right-N : tree[idx].right, tree[idx].right < N ? ainfo->sqinfo[tree[idx].right].name : "interior", tree[idx].rblen); fprintf(fp, "\tHeight: %f\n", tree[idx].diff); fprintf(fp, "\tIncludes:%d seqs\n", tree[idx].incnum); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -