📄 bz_align.c
字号:
static const char rcsid[]="$Id: bz_align.c,v 1.8 2001/08/26 21:16:52 schwartz Exp $";/* bz_align.c -- The only entry point is:** void Align(AlignIOPtr align_IO)** The X-drop variant of dynamic programming is applied to create a gapped* alignment by extending in both directions from a "seed point".** Procedures in this file are not allowed to modify any of the data structures* controlled by bz_extend.c, namely the Msp structures and the alignments and* blocks that they point to.*//* WHEN ADDING TO THE BLIST, CAN WE PRUNE ALIGNMENTS OUTSIDE OF [L,R]? */#include "bz_all.h"#define MININT -9999999#define TRUE 1#define FALSE 0#define Boolean int/* Dynamic Programming structure. */typedef struct DP { int DD, CC, FF; /* FF is unused, but it adds speed under linux */ int mask; /* mask out previously used points */ } *dp_ptr, dp_node;typedef struct dyn_prog { dp_node *p; unsigned int len;} dyn_prog;/* List of blocks maintained herein. */typedef struct _list { block_ptr block; int x, /* column position where block intersects the sweep row */ last_row; char type, filter; struct _list *next;} blist_node, *blist_ptr;/* The trace-back information, i.e., to trace backwards along an optimal path,* is packed into one byte per dynamic-programming grid point. At each grid* point there are three nodes: C which is entered by one diagonal edge, D* which is entered by two vertical edges (from a D node and a C node),* and I which is entered by two horizontal edges. If this is new to you,* please consult Myers and Miller, Bull. Math. Biol. 51 (1989), pp. 5-37.** The right-most two bits hold FLAG_C or FLAG_I or FLAG_D, telling which* node gets the maximum score from an entering edge. FLAG_DD (third bit* from the right) is 1 iff it is better to take a vertical edge from the D* node than a vertical edge from the C node. FLAG_II does the same for* horizontal edges.*/#define FLAG_C (0)#define FLAG_I (1<<0)#define FLAG_D (1<<1)#define FLAG_DD (1<<2)#define FLAG_II (1<<3)#define SELECT_CID (FLAG_I | FLAG_D | FLAG_C)#define PRUNE \{\ c = dp->CC + wa[*(b++)];\ if (Lcol == col) Lcol++;\ else {i = dq->DD = dq->CC = MININT; dq++;}\ dp++; *(tbp++) = 0;\ continue;\}/* ------------- allocate the dynamic-programming "sweep row" array --------- *//* ensure that the first n elements exist and have been initialized */static void dp_ready(dyn_prog *dp, int n){ int old_len, added; if (dp == NULL) fatal("dp_ready called with NULL pointer"); n = MAX(n,0); /* make sure that there is room for n of them */ if (dp->p) { old_len = dp->len; if (n > old_len) { dp->len = dp->len/16 + n + 1000; dp->p = ckrealloc(dp->p, dp->len * sizeof(dp_node)); } } else { old_len = 0; dp->len = n + 1000; dp->p = (dp_node *) ckalloc(dp->len * sizeof(dp_node)); } /* 0..old_len-1 are already in use, but zero the rest */ if ((added = dp->len - old_len) > 0) memset(dp->p + old_len, 0, sizeof(dp->p[0])*added); /* {int i; for (i = old_len; i < dp->len; ++i) dp->p[i].mask = 0;} */}/* --------------------------- Data Structures ------------------------ *//* As the dynamic-programming "sweep row" advances, we need to update the* following information:** 1. The alignment and the specific block that constrain the feasible region* below (down_align and down_block) and above (up_align and up_block), if they* exist. These are initially given for the alignment's seed-point, and* information passed to this file allows efficient updating. The column* indices just inside where the two alignments intersect the current row* are called L and R.** 2. The blocks (gap-free segments of earlier alignments) that intersect* the sweep row within the feasible region. These are keep in a list* (blist) that supports insertion and deletion when the sweep row reaches* an end of an alignment.*//* --------------------- procedures to update L and R ----------------------- *//* next_b -- move to the next down_block or up_block in a forward sweep;* return its column position */static int next_b(block_ptr *bp, Msp_ptr *mp, int is_up, int row, int seed1, int seed2){ *bp = (*bp)->next_block; if (*bp) { if ((*bp)->type == HOR && (*bp = (*bp)->next_block) == NULL) fatal("Last alignment block was horizontal"); return (*bp)->b2 - seed2; } /* we've run off the end of an alignment; move to the next one */ if (is_up) { *bp = (*mp)->up_block2; *mp = (*mp)->up_align2; } else { *bp = (*mp)->down_block2; *mp = (*mp)->down_align2; } /* if indeed there was a next one .. */ if (*bp) { if ((*bp)->type == DIA) return (*bp)->b2 + row + seed1 - (*bp)->b1 - seed2; return (*bp)->b2 - seed2; /* jumped to a vertical block */ } return 0; /* no constraint; there was no "next alignment" */}/* prev_b -- move to the previous down_block or up_block in a reverse sweep;* return its column position */static int prev_b(block_ptr *bp, Msp_ptr *mp, int is_up, int row, int seed1, int seed2){ *bp = (*bp)->prev_block; if (*bp) { if ((*bp)->type == HOR && (*bp = (*bp)->prev_block) == NULL) fatal("First alignment block was horizontal"); return seed2 - (*bp)->e2; } /* we've run off the front of an alignment; move to the previous one */ if (is_up) { *bp = (*mp)->up_block1; *mp = (*mp)->up_align1; } else { *bp = (*mp)->down_block1; *mp = (*mp)->down_align1; } /* if indeed there was a previous one .. */ if (*bp) { if ((*bp)->type == DIA) return seed2 - ((*bp)->e2 + seed1 - row - (*bp)->e1); return seed2 - (*bp)->e2; /* jumped to a vertical block */ } return 0; /* no constraint; there was no "previous alignment" */}/* update_LR -- also update Lc and Rc, the actual column limits for* dynamic programming, which may lie strictly inside L and R because of the* X-drop constraint. */static void update_LR(block_ptr *Up_block, block_ptr *Down_block, int row, int *pL, int *pR, int *pLc, int *pRc, int seed1, int seed2, Boolean reversed, Msp_ptr *Up_align, Msp_ptr *Down_align){ int L = *pL, R = *pR, Lc = *pLc, Rc = *pRc; if (!reversed) { if (*Down_block) { if ((*Down_block)->e1 >= row + seed1) { if ((*Down_block)->type == DIA) L++; } else L = next_b(Down_block, Down_align, 0, row, seed1, seed2) + 1; } if (*Down_block) Lc = MAX(Lc, L); if (*Up_block) { if ((*Up_block)->e1 >= row + seed1) { if ((*Up_block)->type == DIA) R++; } else R = next_b(Up_block, Up_align, 1, row, seed1, seed2) - 1; } if (*Up_block) Rc = MIN(Rc, R); } else { /* reversed */ if (*Up_block) { if ((*Up_block)->b1 <= seed1 - row) { if ((*Up_block)->type == DIA) L++; } else L = prev_b(Up_block, Up_align, 1, row, seed1, seed2) + 1; } if (*Up_block) Lc = MAX(Lc, L); if (*Down_block) { if ((*Down_block)->b1 <= seed1 - row) { if ((*Down_block)->type == DIA) R++; } else R = prev_b(Down_block, Down_align, 0, row, seed1, seed2) - 1; } if (*Down_block) Rc = MIN(Rc, R); } *pL = L; *pR = R; *pLc = Lc; *pRc = Rc;}/* ------------------- procedures to update the blist ----------------------- *//* build_lp -- set the p and e fields of a blist entry and mask the* corresponding positions in the dynamic programming array */static void build_lp(blist_ptr lp, int reversed, dp_ptr CD, int b, int e, int row, int seed1, int seed2){ int i; lp->type = lp->block->type; if (reversed) { lp->x = seed2 - lp->block->e2; lp->last_row = seed1 - lp->block->b1; } else { lp->x = lp->block->b2 - seed2; lp->last_row = lp->block->e1 - seed1; } if (lp->type != HOR) { if (lp->x >= b && lp->x <= e) CD[lp->x].mask = row; } else { int end_hor = (reversed) ? seed2 - lp->block->b2 : lp->block->e2 - seed2; for (i = MAX(b,lp->x); i <= MIN(e,end_hor); i++) CD[i].mask = row; }}/* add_new_align -- add an alignment's terminal block to the blist */static blist_ptr add_new_align(blist_ptr list, Msp_ptr right_list, int reversed, dp_ptr CD, int b, int e, int row, int seed1, int seed2){ blist_ptr lp = (blist_ptr) ckalloc(sizeof(blist_node)); lp->filter = 0; if (reversed) lp->block = right_list->last_block; else lp->block = right_list->first_block; lp->next = list; build_lp(lp, reversed, CD, b, e, row, seed1, seed2); return lp;}/* filter_blist -- remove blist nodes with filter values != i */static void filter_blist(blist_ptr *list, int i){ blist_ptr pp, lp; for (pp = NULL, lp = *list; lp; ) if (lp->filter != i) { if (pp) { pp->next = lp->next; ckfree(lp); lp = pp->next; } else { *list = lp->next; ckfree(lp); lp = *list; } } else { pp = lp; lp = lp->next; }}static block_ptr next_block(block_ptr bp, Boolean reversed)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -