📄 lsim4.c
字号:
/* lsim4.c - calculate non-overlapping local alignments derived from lsim2.c from Webb Miller*//* $Name: fa35_03_06 $ - $Id: lsim4.c,v 1.2 2007/06/29 20:23:58 wrp Exp $ *//* March 2007 - modified to avoid global references */#include <stdio.h>#include <stdlib.h>#include <string.h>#include "defs.h"#include "param.h"#include "lsim4.h"/* SIM(A,B,M,N,mini_score,Q,R) reports best non-intersecting alignments with score >= mini_score of the segments of A and B in order of similarity scores, where pam2[a][b] is the score of aligning a and b, and -(Q+R*i) is the score of an i-symbol indel. *//* SIM uses A[1..M], B[1..N] FORTRAN indexing */void SIM(const unsigned char *A, /* seq1 indexed A[1..M] */ const unsigned char *B, /* seq2 indexed B[1..N] */ int M, int N, /* len seq1, seq2 */ struct pstruct *ppst, /* parameters */ int nseq, /* nseq - number of different sequences */ int mini_score, /* cut-off score */ int max_count, /* number of alignments */ struct a_res_str *a_res) /* alignment result structure */{ int endi, endj, stari, starj; /* endpoint and startpoint */ int score; /* the max score in LIST */ int ck; int i; /* row and column indices */ int flag; struct a_res_str *cur_ares, *tmp_ares; bool first_pass; int q, r, qr; int *sapp, last; struct vert_str vLIST; struct l_struct *l_ptr; pair_ptr z; int count; /* maximum size of list */ vertex_p v_cur; /* temporary pointer */ /* allocate space for all vectors */ l_ptr = (struct l_struct *)ckalloc(1, sizeof(struct l_struct)); l_ptr->CCC = ( space_ptr ) ckalloc(N+1,sizeof(space)); l_ptr->r_ss = (struct lrr_str *) ckalloc(N + 1,sizeof(struct lrr_str)); l_ptr->c_ss = (struct lcc_str *) ckalloc(M + 1, sizeof(struct lcc_str)); l_ptr->row = ( pair_ptr * ) ckalloc( M + 1, sizeof(pair_ptr)); /* set up list for each row */ if ( nseq == 2 ) { for ( i = 1; i <= M; i++ ) {l_ptr->row[i] = 0;} } else { l_ptr->row[0] = l_ptr->row[1] = z = (pair_ptr) ckalloc(M, sizeof(pair)); z->COL = 1; z->NEXT = 0; for ( i = 2; i <= M; i++ ) { l_ptr->row[i] = z = l_ptr->row[i-1] + 1; z->COL = i; z->NEXT = 0; } }#ifdef OLD_FASTA_GAP q = -(ppst->gdelval - ppst->ggapval);#else q = -ppst->gdelval;#endif r = -ppst->ggapval; qr = q + r; vLIST.LIST = vLIST.most = NULL; vLIST.numnode = 0; /* fill in l_ptr->CCC and vLIST */ big_pass(A,B,M,N,mini_score,ppst->pam2[0],q,r,nseq, &vLIST, l_ptr); first_pass= 1; /* Report the K best alignments one by one. After each alignment is output, recompute part of the matrix. First determine the size of the area to be recomputed, then do the recomputation */ count = 0; while (count < max_count) { if ( vLIST.numnode == 0 ) break; /* no more alignments */ if (first_pass) { cur_ares = a_res; } else { /* need a new a_res */ tmp_ares = (struct a_res_str *)ckalloc(1, sizeof(struct a_res_str)); cur_ares->next = tmp_ares; cur_ares = tmp_ares; } /* get the best next alignment */ v_cur = findmax(&vLIST); score = v_cur->SCORE; stari = ++v_cur->STARI; starj = ++v_cur->STARJ; endi = v_cur->ENDI; endj = v_cur->ENDJ; l_ptr->m1 = v_cur->TOP; l_ptr->mm = v_cur->BOT; l_ptr->n1 = v_cur->LEFT; l_ptr->nn = v_cur->RIGHT; l_ptr->rl = endi - stari + 1; l_ptr->cl = endj - starj + 1; l_ptr->I = stari - 1; l_ptr->J = starj - 1; /* minimum allocation for alignment */ sapp = cur_ares->res =(int *)ckalloc(2*min(l_ptr->rl,l_ptr->cl), sizeof(int)); last = 0; cur_ares->sw_score = score; cur_ares->min0 = stari-1; cur_ares->min1 = starj-1; cur_ares->max0 = stari+l_ptr->rl-1; cur_ares->max1 = starj+l_ptr->cl-1; cur_ares->next = NULL; /* produce an alignment, encoded in sapp - equivalent to "align() in dropgsw.c" */ (void) diff(&A[stari]-1, &B[starj]-1, l_ptr->rl, l_ptr->cl, q, q, ppst->pam2[0], q, r, &sapp, &last, l_ptr); ck = CHECK_SCORE(&A[stari]-1,&B[starj]-1,l_ptr->rl,l_ptr->cl, cur_ares->res,ppst->pam2[0],q,r,&cur_ares->nres);#ifdef DEBUG /* the same errors are produced by Miller and Huang's sim96 code, so I hope this reflects a mistake in CHECK_SCORE */ if (score != ck) { fprintf(stderr,"*** Check_score error: orig %d != %d recons ***\n aa0[%d-%d] : aa1[%d-%d]\n", score,ck, cur_ares->min0, cur_ares->max0, cur_ares->min1, cur_ares->max1); }#endif free(v_cur); flag = 0; if (first_pass && maxi(l_ptr->rl, l_ptr->cl) > maxi(M,N)/4) { /*printf("no locate\n");*/ flag = 1; l_ptr->n1 = l_ptr->m1 = 0; } else { locate(A,B,mini_score,ppst->pam2[0],q,r, nseq, &flag, &vLIST, l_ptr); } if ( flag ) { /*printf("small pass\n");*/ small_pass(A,B,mini_score,ppst->pam2[0],q,r, nseq, &vLIST, l_ptr); } first_pass= 0; count++; } /* start cleaning up */ while (vLIST.numnode > 0) { v_cur = findmax(&vLIST); if (v_cur) free(v_cur); } if (nseq == 1) { free(l_ptr->row[0]); } free(l_ptr->row); free(l_ptr->c_ss); free(l_ptr->r_ss); free(l_ptr->CCC); free(l_ptr);}/* A big pass to compute classes scoring over K *//* fills in the l_ptr->CCC structure for further analysis *//* adds nodes to v_ptr->LIST when appropriate *//* does not produce alignments */static voidbig_pass(const unsigned char *A, const unsigned char *B, int M, int N, int mini_score, int **pam2, int Q, int R, int nseq, struct vert_str *v_ptr, struct l_struct *l_ptr){ int i, j; /* row and column indices */ int c; /* best score at current point */ int f; /* best score ending with insertion */ int d; /* best score ending with deletion */ int p; /* best score at (i-1, j-1) */ int ci, cj; /* end-point associated with c */ int fi, fj; /* end-point associated with f */ int pi, pj; /* end-point associated with p */ space_ptr sp; pair_ptr z; int qr; int *va; /* pointer to v(A[i], B[j]) */ qr = Q+R; /* Compute the matrix and save the best scores in LIST CC : the scores of the current row RR and EE : the starting point that leads to score CC DD : the scores of the current row, ending with deletion SS and FF : the starting point that leads to score DD */ /* Initialize the 0 th row */ for ( sp=&l_ptr->CCC[1], j = 1; j <= N; j++, sp++ ) { sp->CC = sp->RR = 0; sp->EE = j; sp->DD = - (qr); sp->SS = 1; sp->FF = j; } for ( i = 1; i <= M; i++) { c = 0; /* Initialize column 0 */ f = - (qr); va = pam2[A[i]]; ci = fi = i; if ( nseq == 2 ) { p = 0; pi = (i - 1); cj = fj = pj = 0; } else { p = l_ptr->CCC[i].CC; pi = l_ptr->CCC[i].RR; pj = l_ptr->CCC[i].EE; cj = fj = i; } j = (nseq == 2 ? 1: i+1); for ( sp = &l_ptr->CCC[j]; sp <= &l_ptr->CCC[N]; j++, sp++) { d = sp->DD; c = -1; /* assign p+va[B[j]] to c if i, j not part of path */ DIAG(i, j, c, p+va[B[j]]) /* diagonal */ if (c < 0) { p = sp->CC; pi = sp->RR; pj = sp->EE; if (f >= 0) { c = f; ci = fi; cj = fj; /* replace ci, cj with sp->SS, sp->FF if c < d */ ORDER1(c, ci, cj, d, sp->SS, sp->FF) sp->CC = c; sp->RR = ci; sp->EE = cj; sp->DD -= R; f-=R; } else if (d >= 0) { sp->CC = d; sp->RR = sp->SS; sp->EE = sp->FF; sp->DD -= R; } else { sp->CC = 0; sp->RR=i; sp->EE = j; } } else { ci = pi; cj = pj; ORDER1(c, ci, cj, f, fi, fj) ORDER1(c, ci, cj, d, sp->SS, sp->FF) p = sp->CC; sp->CC = c; pi = sp->RR; sp->RR = ci; pj = sp->EE; sp->EE = cj; f -= R; if (c >= qr) { if ( c > mini_score) { /* add the score into list */ addnode(c, ci, cj, i, j, v_ptr); } d -= R; c -= qr; ORDER1(f, fi, fj, c, ci, cj) ORDER1(d, sp->SS, sp->FF, c, ci, cj) sp->DD = d; } else { sp->DD -= R; } } } }}/* Determine the left and top boundaries of the recomputed area *//* this function is not recursive */static voidlocate(const unsigned char *A, const unsigned char *B, int mini_score, int **pam2, int Q, int R, int nseq, int *flag_p, struct vert_str *v_ptr, struct l_struct *l_ptr) { int i, j; /* row and column indices */ int c; /* best score at current point */ int f; /* best score ending with insertion */ int d; /* best score ending with deletion */ int p; /* best score at (i-1, j-1) */ int ci, cj; /* end-point associated with c */ int di, dj; int fi, fj; /* end-point associated with f */ int pi, pj; /* end-point associated with p */ space_ptr sp; pair_ptr z; bool cflag, rflag; /* for recomputation */ int *va; /* pointer to v(A[i], B[j]) */ int limit; /* the bound on j */ int qr; qr = Q + R; /* Reverse pass rows come from CCC CC : the scores on the current row RR and EE : the endpoints that lead to CC DD : the deletion scores SS and FF : the endpoints that lead to DD columns come from c_ss[] HH : the scores on the current columns II and JJ : the endpoints that lead to HH WW : the deletion scores XX and YY : the endpoints that lead to WW */ for ( j = l_ptr->nn; j >= l_ptr->n1 ; j-- ) { l_ptr->CCC[j].CC = 0; l_ptr->CCC[j].EE = j; l_ptr->CCC[j].DD = - (Q); l_ptr->CCC[j].FF = j; if ( nseq == 2 || j > l_ptr->mm ) l_ptr->CCC[j].RR = l_ptr->CCC[j].SS = l_ptr->mm + 1; else l_ptr->CCC[j].RR = l_ptr->CCC[j].SS = j; } for ( i = l_ptr->mm; i >= l_ptr->m1; i-- ) { c = p = 0; f = - (Q); ci = fi = i; pi = i + 1; cj = fj = pj = l_ptr->nn + 1; va = pam2[A[i]]; if ( nseq == 2 || l_ptr->n1 > i ) limit = l_ptr->n1; else limit = i + 1; for ( j = l_ptr->nn, sp = &l_ptr->CCC[j]; j >= limit ; j--, sp-- ) { f = f - R; c = c - qr; ORDER(f, fi, fj, c, ci, cj) c = sp->CC - qr; d = sp->DD - R; ORDER(d, sp->SS, sp->FF, c, sp->RR, sp->EE) c = 0; DIAG(i, j, c, p+va[B[j]]) /* diagonal */ if ( c <= 0 ) { c = 0; ci = i; cj = j; } else { ci = pi; cj = pj; } ORDER1(c, ci, cj, d, sp->SS, sp->FF) ORDER1(c, ci, cj, f, fi, fj) p = sp->CC; sp->CC = c; pi = sp->RR; pj = sp->EE; sp->RR = ci; sp->EE = cj; sp->DD = d; if ( c > mini_score ) *flag_p = 1; } if ( nseq == 2 || i < l_ptr->n1 ) { l_ptr->c_ss[i].HH = l_ptr->CCC[l_ptr->n1].CC; l_ptr->c_ss[i].II = l_ptr->CCC[l_ptr->n1].RR; l_ptr->c_ss[i].JJ = l_ptr->CCC[l_ptr->n1].EE; l_ptr->c_ss[i].WW = f; l_ptr->c_ss[i].XX = fi; l_ptr->c_ss[i].YY = fj; } } for ( l_ptr->rl = l_ptr->m1, l_ptr->cl = l_ptr->n1; ; ) { for ( rflag = cflag = 1; ( rflag && l_ptr->m1 > 1 ) || ( cflag && l_ptr->n1 > 1 ) ; ) { if ( rflag && l_ptr->m1 > 1 ) { /* Compute one row */ rflag = 0; l_ptr->m1--; c = p = 0; f = - (Q); ci = fi = l_ptr->m1; pi = l_ptr->m1 + 1; cj = fj = pj = l_ptr->nn + 1; va = pam2[A[l_ptr->m1]]; for ( j = l_ptr->nn, sp = &l_ptr->CCC[j]; j >= l_ptr->n1 ; j--, sp-- ) { f = f - R; c = c - qr; ORDER(f, fi, fj, c, ci, cj) c = sp->CC - qr; ci = sp->RR; cj = sp->EE; d = sp->DD - R; di = sp->SS; dj = sp->FF; ORDER(d, di, dj, c, ci, cj) c = 0; DIAG(l_ptr->m1, j, c, p+va[B[j]]) /* diagonal */ if ( c <= 0 ) { c = 0; ci = l_ptr->m1; cj = j; } else { ci = pi; cj = pj; } ORDER1(c, ci, cj, d, di, dj) ORDER1(c, ci, cj, f, fi, fj) sp->SS = di; sp->FF = dj; p = sp->CC; sp->CC = c; pi = sp->RR; pj = sp->EE; sp->RR = ci; sp->EE = cj; sp->DD = d; if ( c > mini_score ) *flag_p = 1; if ( ! rflag && ( (ci > l_ptr->rl && cj > l_ptr->cl) || (di > l_ptr->rl && dj > l_ptr->cl) || (fi > l_ptr->rl && fj > l_ptr->cl) ) ) rflag = 1; } l_ptr->c_ss[l_ptr->m1].HH = l_ptr->CCC[l_ptr->n1].CC; l_ptr->c_ss[l_ptr->m1].II = l_ptr->CCC[l_ptr->n1].RR; l_ptr->c_ss[l_ptr->m1].JJ = l_ptr->CCC[l_ptr->n1].EE; l_ptr->c_ss[l_ptr->m1].WW = f; l_ptr->c_ss[l_ptr->m1].XX = fi; l_ptr->c_ss[l_ptr->m1].YY = fj; if ( ! cflag && ( (ci > l_ptr->rl && cj > l_ptr->cl) || (di > l_ptr->rl && dj > l_ptr->cl) || (fi > l_ptr->rl && fj > l_ptr->cl ) )) cflag = 1; } if ( nseq == 1 && l_ptr->n1 == (l_ptr->m1 + 1) && ! rflag ) cflag = 0; if ( cflag && l_ptr->n1 > 1 ) { /* Compute one column */ cflag = 0; l_ptr->n1--; c = 0; f = - (Q); cj = fj = l_ptr->n1; va = pam2[B[l_ptr->n1]]; if ( nseq == 2 || l_ptr->mm < l_ptr->n1 ) { p = 0; ci = fi = pi = l_ptr->mm + 1; pj = l_ptr->n1 + 1; limit = l_ptr->mm; } else { p = l_ptr->c_ss[l_ptr->n1].HH; pi = l_ptr->c_ss[l_ptr->n1].II;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -