📄 greedy_align.c
字号:
} else { for (row = 0; row < len2 && row < len1 && (s2[len2-1-row] == s1[len1-1-row]); row++) /*empty*/ ; } } else { if (!(rem & 4)) { for (row = 0; row < len2 && row < len1 && (s2[row] == NCBI2NA_UNPACK_BASE(s1[(row+rem)/4], 3-(row+rem)%4)); row++) /*empty*/ ; } else { for (row = 0; row < len2 && row < len1 && (s2[row] == s1[row]); row++) /*empty*/ ; } } *e1 = row; *e2 = row; if (row == len1) { if (S != NULL) edit_script_rep(S, row); /* hit last row; stop search */ return 0; } if (S==NULL) { space = 0; } else if (!space) { gamp->space = space = MBSpaceNew(); } else { refresh_mb_space(space); } max_row = max_row_free + D_diff; for (k = 0; k < D_diff; k++) max_row_free[k] = 0; flast_d[0][ORIGIN] = row; max_row[0] = (row + row)*M_half; flower = ORIGIN - 1; fupper = ORIGIN + 1; d = 1; while (d <= MAX_D) { Int4 fl0, fu0; flast_d[d - 1][flower - 1] = flast_d[d - 1][flower] = -1; flast_d[d - 1][fupper] = flast_d[d - 1][fupper + 1] = -1; x = max_row[d - D_diff] + Op_cost * d - X_pen; x = ICEIL(x, M_half); cur_max = 0; fl0 = flower; fu0 = fupper; for (k = fl0; k <= fu0; k++) { row = MAX(flast_d[d - 1][k + 1], flast_d[d - 1][k]) + 1; row = MAX(row, flast_d[d - 1][k - 1]); col = row + k - ORIGIN; if (row + col >= x) fupper = k; else { if (k == flower) flower++; else flast_d[d][k] = -1; continue; } if (row > max_len || row < 0) { flower = k+1; nlower = 1; } else { /* Slide down the diagonal. */ if (reverse) { if (!(rem & 4)) { while (row < len2 && col < len1 && s2[len2-1-row] == NCBI2NA_UNPACK_BASE(s1[(len1-1-col)/4], 3-(len1-1-col)%4)) { ++row; ++col; } } else { while (row < len2 && col < len1 && s2[len2-1-row] == s1[len1-1-col]) { ++row; ++col; } } } else { if (!(rem & 4)) { while (row < len2 && col < len1 && s2[row] == NCBI2NA_UNPACK_BASE(s1[(col+rem)/4], 3-(col+rem)%4)) { ++row; ++col; } } else { while (row < len2 && col < len1 && s2[row] == s1[col]) { ++row; ++col; } } } } flast_d[d][k] = row; if (row + col > cur_max) { cur_max = row + col; b_diag = k; } if (row == len2) { flower = k+1; nlower = 1; } if (col == len1) { fupper = k-1; nupper = 1; } } k = cur_max*M_half - d * Op_cost; if (max_row[d - 1] < k) { max_row[d] = k; return_val = d; best_diag = b_diag; *e2 = flast_d[d][b_diag]; *e1 = (*e2)+b_diag-ORIGIN; } else { max_row[d] = max_row[d - 1]; } if (flower > fupper) break; d++; if (!nlower) flower--; if (!nupper) fupper++; if (S==NULL) flast_d[d] = flast_d[d - 2]; else { /* space array consists of SThreeVal structures which are 3 times larger than Int4, so divide requested amount by 3 */ flast_d[d] = (Int4*) get_mb_space(space, (fupper-flower+7)/3); if (flast_d[d] != NULL) flast_d[d] = flast_d[d] - flower + 2; else return return_val; } } if (S!=NULL) { /*trace back*/ Int4 row1, col1, diag1, diag; d = return_val; diag = best_diag; row = *e2; col = *e1; while (d > 0) { diag1 = get_last(flast_d, d, diag, &row1); col1 = row1+diag1-ORIGIN; if (diag1 == diag) { if (row-row1 > 0) edit_script_rep(S, row-row1); } else if (diag1 < diag) { if (row-row1 > 0) edit_script_rep(S, row-row1); edit_script_ins(S,1); } else { if (row-row1-1> 0) edit_script_rep(S, row-row1-1); edit_script_del(S, 1); } d--; diag = diag1; col = col1; row = row1; } edit_script_rep(S, flast_d[0][ORIGIN]); if (!reverse) edit_script_reverse_inplace(S); } return return_val;}Int4 BLAST_AffineGreedyAlign (const Uint1* s1, Int4 len1, const Uint1* s2, Int4 len2, Boolean reverse, Int4 xdrop_threshold, Int4 match_score, Int4 mismatch_score, Int4 gap_open, Int4 gap_extend, Int4* e1, Int4* e2, SGreedyAlignMem* gamp, MBGapEditScript *S, Uint1 rem){ Int4 col, /* column number */ d, /* current distance */ k, /* current diagonal */ flower, fupper, /* boundaries for searching diagonals */ row, /* row number */ MAX_D, /* maximum cost */ ORIGIN, return_val = 0; SThreeVal** flast_d; /* rows containing the last d */ Int4* max_row_free = gamp->max_row_free; Int4* max_row; /* reached for cost d=0, ... len1. */ Int4 Mis_cost, GO_cost, GE_cost; Int4 D_diff, gd; Int4 M_half; Int4 max_cost; Int4 *lower, *upper; Int4 x, cur_max, b_diag = 0, best_diag = INT4_MAX/2; char nlower = 0, nupper = 0; SMBSpace* space = gamp->space; Int4 stop_condition; Int4 max_d; Int4* uplow_free; Int4 max_len = len2; if (match_score % 2 == 1) { match_score *= 2; mismatch_score *= 2; xdrop_threshold *= 2; gap_open *= 2; gap_extend *= 2; } M_half = match_score/2; if (gap_open == 0 && gap_extend == 0) { return BLAST_GreedyAlign(s1, len1, s2, len2, reverse, xdrop_threshold, match_score, mismatch_score, e1, e2, gamp, S, rem); } Mis_cost = mismatch_score + match_score; GO_cost = gap_open; GE_cost = gap_extend+M_half; gd = gdb3(&Mis_cost, &GO_cost, &GE_cost); D_diff = ICEIL(xdrop_threshold+M_half, gd); MAX_D = (Int4) (len1/ERROR_FRACTION + 1); max_d = MAX_D*GE_cost; ORIGIN = MAX_D + 2; max_cost = MAX(Mis_cost, GO_cost+GE_cost); *e1 = *e2 = 0; if (reverse) { if (!(rem & 4)) { for (row = 0; row < len2 && row < len1 && (s2[len2-1-row] == NCBI2NA_UNPACK_BASE(s1[(len1-1-row)/4], 3-(len1-1-row)%4)); row++) /*empty*/ ; } else { for (row = 0; row < len2 && row < len1 && (s2[len2-1-row] == s1[len1-1-row]); row++) /*empty*/ ; } } else { if (!(rem & 4)) { for (row = 0; row < len2 && row < len1 && (s2[row] == NCBI2NA_UNPACK_BASE(s1[(row+rem)/4], 3-(row+rem)%4)); row++) /*empty*/ ; } else { for (row = 0; row < len2 && row < len1 && (s2[row] == s1[row]); row++) /*empty*/ ; } } *e1 = row; *e2 = row; if (row == len1 || row == len2) { if (S != NULL) edit_script_rep(S, row); /* hit last row; stop search */ return row*match_score; } flast_d = gamp->flast_d_affine; if (S==NULL) { space = 0; } else if (!space) { gamp->space = space = MBSpaceNew(); } else { refresh_mb_space(space); } max_row = max_row_free + D_diff; for (k = 0; k < D_diff; k++) max_row_free[k] = 0; uplow_free = gamp->uplow_free; lower = uplow_free; upper = uplow_free+max_d+1+max_cost; /* next 3 lines set boundary for -1,-2,...,-max_cost+1*/ for (k = 0; k < max_cost; k++) {lower[k] =LARGE; upper[k] = -LARGE;} lower += max_cost; upper += max_cost; flast_d[0][ORIGIN].C = row; flast_d[0][ORIGIN].I = flast_d[0][ORIGIN].D = -2; max_row[0] = (row + row)*M_half; lower[0] = upper[0] = ORIGIN; flower = ORIGIN - 1; fupper = ORIGIN + 1; d = 1; stop_condition = 1; while (d <= max_d) { Int4 fl0, fu0; x = max_row[d - D_diff] + gd * d - xdrop_threshold; x = ICEIL(x, M_half); if (x < 0) x=0; cur_max = 0; fl0 = flower; fu0 = fupper; for (k = fl0; k <= fu0; k++) { row = -2; if (k+1 <= upper[d-GO_cost-GE_cost] && k+1 >= lower[d-GO_cost-GE_cost]) row = flast_d[d-GO_cost-GE_cost][k+1].C; if (k+1 <= upper[d-GE_cost] && k+1 >= lower[d-GE_cost] && row < flast_d[d-GE_cost][k+1].D) row = flast_d[d-GE_cost][k+1].D; row++; if (row+row+k-ORIGIN >= x) flast_d[d][k].D = row; else flast_d[d][k].D = -2; row = -1; if (k-1 <= upper[d-GO_cost-GE_cost] && k-1 >= lower[d-GO_cost-GE_cost]) row = flast_d[d-GO_cost-GE_cost][k-1].C; if (k-1 <= upper[d-GE_cost] && k-1 >= lower[d-GE_cost] && row < flast_d[d-GE_cost][k-1].I) row = flast_d[d-GE_cost][k-1].I; if (row+row+k-ORIGIN >= x) flast_d[d][k].I = row; else flast_d[d][k].I = -2; row = MAX(flast_d[d][k].I, flast_d[d][k].D); if (k <= upper[d-Mis_cost] && k >= lower[d-Mis_cost]) row = MAX(flast_d[d-Mis_cost][k].C+1,row); col = row + k - ORIGIN; if (row + col >= x) fupper = k; else { if (k == flower) flower++; else flast_d[d][k].C = -2; continue; } if (row > max_len || row < -2) { flower = k; nlower = k+1; } else { /* slide down the diagonal */ if (reverse) { if (!(rem & 4)) { while (row < len2 && col < len1 && s2[len2-1-row] == NCBI2NA_UNPACK_BASE(s1[(len1-1-col)/4], 3-(len1-1-col)%4)) { ++row; ++col; } } else { while (row < len2 && col < len1 && s2[len2-1-row] == s1[len1-1-col]) { ++row; ++col; } } } else { if (!(rem & 4)) { while (row < len2 && col < len1 && s2[row] == NCBI2NA_UNPACK_BASE(s1[(col+rem)/4], 3-(col+rem)%4)) { ++row; ++col; } } else { while (row < len2 && col < len1 && s2[row] == s1[col]) { ++row; ++col; } } } } flast_d[d][k].C = row; if (row + col > cur_max) { cur_max = row + col; b_diag = k; } if (row == len2) { flower = k; nlower = k+1; } if (col == len1) { fupper = k; nupper = k-1; } } k = cur_max*M_half - d * gd; if (max_row[d - 1] < k) { max_row[d] = k; return_val = d; best_diag = b_diag; *e2 = flast_d[d][b_diag].C; *e1 = (*e2)+b_diag-ORIGIN; } else { max_row[d] = max_row[d - 1]; } if (flower <= fupper) { stop_condition++; lower[d] = flower; upper[d] = fupper; } else { lower[d] = LARGE; upper[d] = -LARGE; } if (lower[d-max_cost] <= upper[d-max_cost]) stop_condition--; if (stop_condition == 0) break; d++; flower = MIN(lower[d-Mis_cost], MIN(lower[d-GO_cost-GE_cost], lower[d-GE_cost])-1); if (nlower) flower = MAX(flower, nlower); fupper = MAX(upper[d-Mis_cost], MAX(upper[d-GO_cost-GE_cost], upper[d-GE_cost])+1); if (nupper) fupper = MIN(fupper, nupper); if (d > max_cost) { if (S==NULL) { /*if (d > max_cost)*/ flast_d[d] = flast_d[d - max_cost-1]; } else { flast_d[d] = get_mb_space(space, fupper-flower+1)-flower; if (flast_d[d] == NULL) return return_val; } } } if (S!=NULL) { /*trace back*/ Int4 row1, diag, state; d = return_val; diag = best_diag; row = *e2; state = sC; while (d > 0) { if (state == sC) { /* diag will not be changed*/ state = get_lastC(flast_d, lower, upper, &d, diag, Mis_cost, &row1); if (row-row1 > 0) edit_script_rep(S, row-row1); row = row1; } else { if (state == sI) { /*row unchanged */ state = get_lastI(flast_d, lower, upper, &d, diag, GO_cost, GE_cost); diag--; edit_script_ins(S,1); } else { edit_script_del(S,1); state = get_lastD(flast_d, lower, upper, &d, diag, GO_cost, GE_cost); diag++; row--; } } } edit_script_rep(S, flast_d[0][ORIGIN].C); if (!reverse) edit_script_reverse_inplace(S); } return_val = max_row[return_val]; return return_val;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -