📄 bz_chain.c
字号:
#include "util.h"#include "seq.h"#include "bz_all.h"#ifndef __lintstatic const char rcsid[] = "$Id: bz_chain.c,v 1.1 2001/04/18 19:12:42 webb Exp $";#endif/* The entry point is** msp_make_chain(msp_table_t *mt, int Gap1, int Gap2, int Scale, connect_fn)** A highest_scoring chain of MSPs is determined, where each MSP in the chain* (other than the last), begins strictly before the start of the next MSP in* the chain, and a chain's score is Scale times the sum of MSP scores minus* the sum of* connect(MSP_i, MSP_(i+1), Scale)* (the last sum is taken over all MSPs in the chain except the last). An MSP's* cum_score value is set to 1 if the MSP is in the chain, and to 0 otherwise.* Parameters Gap1 and Gap2 permit msp_make_chain to deduce useful* inequalities about chain scores. Namely, let MSP_i and MSP_j be MSPs on* diagonals diag_i and diag_j, and set diff = diag_j - diag_i.* Then Gap1 and Gap2 are required to satisfy:* if diff >= 0, then connect(MSP_i,MSP_j) >= diff*Gap1,* while* if diff < 0, then connect(MSP_i,MSP_j) >= -diff*Gap2.* In effect, Scale permits integer arithmetic to be used with very small gap* penalties, since the computed chain also maximizes the sum of the MSP costs* minus the sum of the connect(MSP_i, MSP_(i+1), Scale)/Scale.*/typedef double big_score_t;#define px(i,j,G) \ ((j == 0) ? \ (G->start[G->perm[i]].pos1 - G->start[G->perm[i]].pos2): \ (G->start[G->perm[i]].pos2))enum { BUCKET_SIZE=3 };typedef struct { int Gap1, Gap2, Scale; /* chain penalties */ big_score_t *total_score; int *perm; int *rev_perm; msp_t *start, *query; int X, Y, Diag; /* target point */ connect_t connect;} gp_t;typedef struct best_predecessor { int num; big_score_t contrib;} bp_t;typedef struct kdnode { int bucket; int cutval; big_score_t max_score; struct kdnode *loson, *hison; int lopt, hipt;} Kdnode, *Kdptr;static bp_t bp_cons(int n, big_score_t c){ bp_t bp; bp.num = n; bp.contrib = c; return bp;}/* --------------------------- build the K-d tree --------------------------- */#define SWAP(p,q) \ do {int t; t = G->perm[p]; G->perm[p] = G->perm[q]; G->perm[q] = t;} \ /*CONSTCOND*/ while(0)static int partition(int L, int U, int cutdim, const gp_t *const G){ int m, a, b, c, v, i, j; if (U - L < 2) fatal("partition: cannot happen"); m = (L+U)/2; v = a = px(L, cutdim, G); b = px(m, cutdim, G); c = px(U, cutdim, G); /* find the median of three entries; move to the front */ if ((a <= b && b <= c) || (c <= b && b <= a)) { SWAP(L,m); v = b; } else if ((a <= c && c <= b) || (b <= c && c <= a)) { SWAP(L,U); v = c; } /* move smaller entries to front, larger to rear */ i = L; j = U+1; while (i < j) { /* search forward for a large entry */ for (++i; i <= U && px(i,cutdim,G) <= v; ++i) ; /* search backward for a small entry */ for (--j; j >= L && px(j,cutdim,G) > v; --j) ; SWAP(i,j); } SWAP(i,j); /* reverse the last swap */ SWAP(L,j); /* move the "pivot" value to the proper location */ /* warning: if we return j = U, build() will recurse forever */ if (j < U) return j; if (U-L == 2) return U-1; return partition(L, U-1, cutdim, G);}static Kdptr build(int L, int U, int toggle, const gp_t *const G){ Kdptr p; int m; p = ckallocz(sizeof(Kdnode)); p->max_score = 0; if (U-L+1 <= BUCKET_SIZE) { p->bucket = 1; p->lopt = L; p->hipt = U; } else { p->bucket = 0; m = partition(L, U, toggle, G); p->cutval = px(m, toggle, G); p->hipt = m; p->loson = build(L, m, 1-toggle, G); p->hison = build(m+1, U, 1-toggle, G); } assert(p); assert(p->bucket || (p->loson && p->hison)); return p;}/* -------------------- find a best predecessor of an MSP ------------------- */static bp_t best_pred(Kdptr Q, int lowbd_dist, int toggle, bp_t bp, const gp_t *const G){ int i; big_score_t contrib; assert(Q); if (Q->max_score - lowbd_dist <= bp.contrib) return bp; assert(Q->bucket || (Q->loson && Q->hison)); if (Q->bucket) for (i = Q->lopt; i <= Q->hipt; ++i) { int j = G->perm[i]; msp_t *s = G->start+j; if (s->pos1 >= G->X || s->pos2 >= G->Y) continue; contrib = G->total_score[j] - G->connect(s, G->query, G->Scale); if (contrib > bp.contrib) { bp.contrib = contrib; bp.num = j; } } else if (toggle == 0) { big_score_t diff = G->Diag - Q->cutval; if (diff >= 0) { bp = best_pred(Q->hison, lowbd_dist, 1-toggle, bp, G); bp = best_pred(Q->loson, diff*G->Gap1, 1-toggle, bp, G); } else { bp = best_pred(Q->loson, lowbd_dist, 1-toggle, bp, G); bp = best_pred(Q->hison, -diff*G->Gap2, 1-toggle, bp, G); } } else { if (G->Y >= Q->cutval) bp = best_pred(Q->hison, lowbd_dist, 1-toggle, bp, G); bp = best_pred(Q->loson, lowbd_dist, 1-toggle, bp, G); } return bp;}static void update_max_score(Kdptr Q, big_score_t score, int pos){ if (Q != NULL) { Q->max_score = MAX(Q->max_score, score); if (Q->hipt >= pos) update_max_score(Q->loson, score, pos); else update_max_score(Q->hison, score, pos); }}/* ------------------------------ Entry Point ------------------------------ */intmsp_make_chain(msp_table_t *mt, int gap1, int gap2, int scale, connect_t con){ msp_t *p; big_score_t best; int best_end; int i, n, *chain; Kdptr root; gp_t G; bp_t bp; if (mt == 0) return 0; msp_sort_pos1(mt); n = MSP_TAB_NUM(mt); if (n == 0) return 0; G.connect = con; G.start = MSP_TAB_FIRST(mt); G.perm = ckalloc(sizeof(int)*n); G.rev_perm = ckalloc(sizeof(int)*n); G.total_score = ckalloc(sizeof(big_score_t)*n); G.Gap1 = gap1; G.Gap2 = gap2; G.Scale = scale; for (i = 0; i < n; ++i) G.perm[i] = i; root = build(0, n-1, 1, &G); for (i = 0; i < n; ++i) G.rev_perm[G.perm[i]] = i; for (i = 0; i < n; ++i) G.total_score[i] = 0; chain = ckalloc(n*sizeof(int)); best_end = -1; best = 0; for (i = 0; i < n; i++) { G.query = G.start+i; G.X = G.query->pos1; G.Y = G.query->pos2; G.Diag = G.X - G.Y; bp = best_pred(root, 0, 1, bp_cons(-1, (big_score_t) 0), &G); G.total_score[i] = G.query->score*G.Scale + bp.contrib; if (G.total_score[i] > best) { best = G.total_score[i]; best_end = i; } chain[i] = bp.num; update_max_score(root, G.total_score[i], G.rev_perm[i]); } for (p = MSP_TAB_FIRST(mt); MSP_TAB_MORE(mt,p); p = MSP_TAB_NEXT(p)) p->cum_score = 0; for (i = best_end; i != -1; i = chain[i]) (G.start+i)->cum_score = 1; /* some day we need to clean up this cum_score business */ ZFREE(G.perm); ZFREE(G.rev_perm); ZFREE(G.total_score); ZFREE(chain); return best;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -