📄 bz_align.c
字号:
{ return (reversed ? bp->prev_block : bp->next_block);}static void update_blist(Boolean reversed, blist_ptr *plist, dp_ptr dp, int row, Msp_ptr *p_align_list, int begin, int end, int seed1, int seed2){ blist_ptr lp, list = *plist; Msp_ptr align_list = *p_align_list; for (lp = list; lp; lp = lp->next) { if (lp->type == HOR) fatal("Impossible HOR block."); if (lp->last_row >= row) { if (lp->type == DIA) ++(lp->x); if (lp->x >= begin && lp->x <= end) dp[lp->x].mask = row; } else if ((lp->block = next_block(lp->block, reversed)) != NULL) { build_lp(lp, reversed, dp, begin, end, row, seed1, seed2); if (lp->type == HOR) { lp->block = next_block(lp->block, reversed); build_lp(lp, reversed, dp, begin, end, row, seed1, seed2); } } else lp->filter = 1; } /* sweep row may have reached new alignments */ if (!reversed) { while (align_list && align_list->pos1 - seed1 == row) { list = add_new_align(list, align_list, reversed, dp, begin, end, row, seed1, seed2); align_list = align_list->next_msp; } } else { while (align_list && seed1 - align_list->end1 == row) { list = add_new_align(list, align_list, reversed, dp, begin, end, row, seed1, seed2); align_list = align_list->prev_msp; } } filter_blist(&list, 0); /* delete blocks whose filter is not 0 */ *plist = list; *p_align_list = align_list;}/* --------------------- procedure to do the real work ---------------------- */static int ALIGN_SMALL(uchar *A, uchar *B, int M, int N, edit_script_t *S, int *pend1, int *pend2, AlignIOPtr align_IO, int reversed){ int cb, row, col = 0, L, R, Lcol, Rcol, NN, gap_extend, c, d, i, m, dp_len, best_score, *wa, X, tback_len, Lcol_start, end1, end2, needed, seed1, seed2; dp_ptr dp, dq; dyn_prog *dyn_prog; static uchar **tback_row = 0; /* XXX - cached between calls - not reentrant!! */ uchar *tbp, *b; uchar st = 0; uchar k, s; int **matrix; TBack tback; blist_ptr list; block_ptr Up_block, Down_block; Msp_ptr align_list, Up_align, Down_align; if (N <= 0 || M <= 0) return *pend1 = *pend2 = 0; matrix = align_IO->subscore; gap_extend = align_IO->gap_extend; m = align_IO->gap_open + gap_extend; tback = align_IO->tback; tback_len = tback->len; X = align_IO->x_parameter; L = 0; R = N; seed1 = align_IO->seed1; seed2 = align_IO->seed2; Up_block = align_IO->Up_block; if (Up_block) { if (Up_block->type == DIA) R = Up_block->b2 + seed1 - Up_block->b1 - seed2; else R = Up_block->b2 - seed2; } Down_block = align_IO->Down_block; if (Down_block) { if (Down_block->type == DIA) L = Down_block->b2 + seed1 - Down_block->b1 - seed2; else L = Down_block->b2 - seed2; } if (reversed) { int temp = 0; /* silence compiler warning */ if (Down_block) temp = -L; if (Up_block) L = -R; if (Down_block) R = temp; } list = NULL; Up_align = align_IO->Up_align; Down_align = align_IO->Down_align; align_list = (reversed ? align_IO->Left_list : align_IO->Right_list); // tback_row = ckalloc(sizeof(uchar *)*(M+1)); // XXX - the following is a performance hack, to avoid expensive // - mmap/munmap activity in linux with gnu malloc. // - malloc should be smarter, in which case this would not be needed. // XXX - not reentrant! { unsigned int n = roundup(sizeof(uchar *)*(M+1), 64*1024); tback_row = ckrealloc(tback_row, n); } // This didn't actually help as much as I wanted: too many different // sizes. But I think mremap is cheaper than mmap/mfree, so keep it. tbp = tback_row[0] = tback->space; needed = X/gap_extend + 6; /* upper bound for 0-th row */ if (needed > tback_len) fatal("Too little space in trace_back array"); dyn_prog = ckallocz(sizeof(dp_node)); dp_ready(dyn_prog, col); dp_len = dyn_prog->len; dyn_prog->p[0].CC = 0; *(tbp++) = 0; c = dyn_prog->p[0].DD = -m; for (col = 1; col <= N && c >= -X; col++) { dyn_prog->p[col].CC = c; dyn_prog->p[col].DD = c - m; *(tbp++) = FLAG_I; c -= gap_extend; } Lcol = best_score = end1 = end2 = 0; Rcol = col; /* 1 column beyond the feasible region */ for (row = 1; row <= M; row++) { Lcol_start = Lcol; update_LR(&Up_block, &Down_block, row, &L, &R, &Lcol, &Rcol, seed1, seed2, reversed, &Up_align, &Down_align); update_blist(reversed, &list, dyn_prog->p - Lcol_start, row, &align_list, Lcol, Rcol, seed1, seed2); needed = Rcol - Lcol + X/gap_extend + 6; if (tbp - tback_row[0] + needed >= tback_len) { if (reversed) fprintf(stderr, "truncating alignment starting at (%d,%d);", seed1 + 2 - end1, seed2 + 2 - end2); else fprintf(stderr, "truncating alignment ending at (%d,%d);", end1 + seed1 + 1, end2 + seed2 + 1); fprintf(stderr, " seeded at (%d,%d)\n", seed1, seed2); goto back; } tback_row[row] = tbp - Lcol; wa = matrix[A[row]]; i = c = MININT; b = B + Lcol + 1; dp_ready(dyn_prog, needed); dp_len = dyn_prog->len; dq = dyn_prog->p; dp = dq + Lcol - Lcol_start; /* We will be taking old values from *dp and putting new ones * at *dq. If dq lags behind dp (because update_LR has raised * Lc) that only makes it easier to avoid overwriting CC * entries before they are used. */ for (col = cb = Lcol; col < Rcol && b < B + N; col++) { d = dp->DD; /* At this point, c, d and i hold the values reaching * nodes C, D and I along edges from previous grid * points. C also has edges entering it from the * current D and I nodes, so c may be improved. Also, * b, dq, dp and tbp point to values for the current * position; rather than incrementing them in the "for" * statement it is more efficient to ++ their last use. */ if (list && dp->mask == row) { PRUNE } if (c < d || c < i) { if (d < i) { c = i; st = FLAG_DD | FLAG_II | FLAG_I; } else { c = d; st = FLAG_DD | FLAG_II | FLAG_D; } if (c < best_score - X) { PRUNE } i -= gap_extend; /* for next grid pt in row */ dq->DD = d - gap_extend; /* d for next row */ } else { if (c < best_score - X) { PRUNE } if (c > best_score) { best_score = c; end1 = row; end2 = col; } if ((c -= m) > (d -= gap_extend)) { dq->DD = c; st = FLAG_C; } else { dq->DD = d; st = FLAG_DD | FLAG_C; } if (c > (i -= gap_extend)) i = c; else st |= FLAG_II; c += m; } cb = col; /* last non-pruned position */ /* don't overwrite CC too soon */ d = (dp++)->CC + wa[*(b++)]; (dq++)->CC = c; /* c for this position */ c = d; /* c for next position in this row */ *(tbp++) = st; } if (Lcol >= Rcol) break; NN = (Up_block ? R : N); if (cb < Rcol-1) Rcol = cb+1; else while (i >= best_score - X && Rcol <= NN) { assert(dq - dyn_prog->p < dp_len); dq->CC = i; (dq++)->DD = i - m; i -= gap_extend; *(tbp++) = FLAG_I; Rcol++; } if (Rcol <= NN) { assert(dq - dyn_prog->p < dp_len); dq->DD = dq->CC = MININT; Rcol++; } } back: *pend1 = row = end1; *pend2 = col = end2; for (s = 0; row >= 1 || col > 0 ; s = k) { st = tback_row[row][col]; k = st & SELECT_CID; if (s == FLAG_I && (st & FLAG_II)) k = FLAG_I; if (s == FLAG_D && (st & FLAG_DD)) k = FLAG_D; if (k == FLAG_I) { col--; edit_script_ins(S, 1); } else if (k == FLAG_D) { row--; edit_script_del(S, 1); } else { row--; col--; edit_script_rep(S, 1); } } filter_blist(&list, 2); /* removes everything */ // ckfree(tback_row); // XXX - static var ckfree(dyn_prog->p); ckfree(dyn_prog); return best_score;}void Align(AlignIOPtr align_IO){ int seed1, seed2, score_right, score_left, end1, end2; edit_script_t *ed_scr, *ed_scr1; if (align_IO == NULL) fatal("Align() called with NULL pointer."); seed1 = align_IO->seed1; seed2 = align_IO->seed2; ed_scr = edit_script_new(); score_left = ALIGN_SMALL( align_IO->seq1_r + align_IO->len1 - seed1 - 2, align_IO->seq2_r + align_IO->len2 - seed2 - 2, seed1 + 1, seed2 + 1, ed_scr, &end1, &end2, align_IO, TRUE); align_IO->start1 = seed1 + 1 - end1; align_IO->start2 = seed2 + 1 - end2; ed_scr1 = edit_script_new(); score_right = ALIGN_SMALL( align_IO->seq1 + seed1, align_IO->seq2 + seed2, align_IO->len1 - seed1 - 1, align_IO->len2 - seed2 - 1, ed_scr1, &end1, &end2, align_IO, FALSE); align_IO->stop1 = seed1 + end1; align_IO->stop2 = seed2 + end2; edit_script_reverse_inplace(ed_scr1); edit_script_append(ed_scr, ed_scr1); edit_script_free(ed_scr1); align_IO->score = score_right + score_left; align_IO->ed_scr = ed_scr;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -