📄 lsim4.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*  lsim4.c - calculate non-overlapping local alignments    derived from lsim2.c from Webb Miller*//* $Name: fa35_03_06 $ - $Id: lsim4.c,v 1.2 2007/06/29 20:23:58 wrp Exp $ *//*  March 2007 - modified to avoid global references  */#include <stdio.h>#include <stdlib.h>#include <string.h>#include "defs.h"#include "param.h"#include "lsim4.h"/* SIM(A,B,M,N,mini_score,Q,R) reports best non-intersecting alignments with   score >= mini_score of the segments of A and B in order of similarity   scores, where pam2[a][b] is the score of aligning a and b, and   -(Q+R*i) is the score of an i-symbol indel.  *//* SIM uses A[1..M], B[1..N] FORTRAN indexing */void SIM(const unsigned char *A, /* seq1 indexed A[1..M] */	 const unsigned char *B, /* seq2 indexed B[1..N] */	 int M, int N,		 /* len seq1, seq2 */	 struct pstruct *ppst,	/* parameters */	 int nseq,		 /* nseq - number of different sequences */	 int mini_score,	 /* cut-off score */	 int max_count,		 /* number of alignments */	 struct a_res_str *a_res)	/* alignment result structure */{  int endi, endj, stari, starj;	 /* endpoint and startpoint */   int score;  			 /* the max score in LIST */  int ck;  int  i;		 /* row and column indices */  int flag;  struct a_res_str *cur_ares, *tmp_ares;  bool first_pass;			  int q, r, qr;  int *sapp, last;  struct vert_str vLIST;  struct l_struct *l_ptr;  pair_ptr z;  int count;			 /* maximum size of list */	  vertex_p v_cur;		 /* temporary pointer */  /* allocate space for all vectors */  l_ptr = (struct l_struct *)ckalloc(1, sizeof(struct l_struct));  l_ptr->CCC = ( space_ptr ) ckalloc(N+1,sizeof(space));  l_ptr->r_ss = (struct lrr_str *) ckalloc(N + 1,sizeof(struct lrr_str));  l_ptr->c_ss = (struct lcc_str *) ckalloc(M + 1, sizeof(struct lcc_str));  l_ptr->row = ( pair_ptr * ) ckalloc( M + 1, sizeof(pair_ptr));  /* set up list for each row */  if ( nseq == 2 ) {    for ( i = 1; i <= M; i++ ) {l_ptr->row[i] = 0;}  }  else {    l_ptr->row[0] = l_ptr->row[1] = z = (pair_ptr) ckalloc(M, sizeof(pair));    z->COL = 1;    z->NEXT = 0;    for ( i = 2; i <= M; i++ ) {      l_ptr->row[i] = z = l_ptr->row[i-1] + 1;      z->COL = i;			      z->NEXT = 0;    }  }#ifdef OLD_FASTA_GAP  q = -(ppst->gdelval - ppst->ggapval);#else  q = -ppst->gdelval;#endif  r = -ppst->ggapval;  qr = q + r;  vLIST.LIST = vLIST.most = NULL;  vLIST.numnode = 0;  /* fill in l_ptr->CCC and vLIST */  big_pass(A,B,M,N,mini_score,ppst->pam2[0],q,r,nseq, &vLIST, l_ptr);  first_pass= 1;  /* Report the K best alignments one by one. After each alignment is     output, recompute part of the matrix. First determine the size     of the area to be recomputed, then do the recomputation         */  count = 0;  while (count < max_count) {    if ( vLIST.numnode == 0 ) break;	/* no more alignments */    if (first_pass) {      cur_ares = a_res;    }    else {	/* need a new a_res */      tmp_ares = (struct a_res_str *)ckalloc(1, sizeof(struct a_res_str));      cur_ares->next = tmp_ares;      cur_ares = tmp_ares;    }    /* get the best next alignment */    v_cur = findmax(&vLIST);    score = v_cur->SCORE;    stari = ++v_cur->STARI;    starj = ++v_cur->STARJ;    endi = v_cur->ENDI;    endj = v_cur->ENDJ;    l_ptr->m1 = v_cur->TOP;    l_ptr->mm = v_cur->BOT;    l_ptr->n1 = v_cur->LEFT;    l_ptr->nn = v_cur->RIGHT;    l_ptr->rl = endi - stari + 1;    l_ptr->cl = endj - starj + 1;    l_ptr->I = stari - 1;    l_ptr->J = starj - 1;    /* minimum allocation for alignment */    sapp = cur_ares->res =(int *)ckalloc(2*min(l_ptr->rl,l_ptr->cl), sizeof(int));    last = 0;    cur_ares->sw_score = score;    cur_ares->min0 = stari-1;    cur_ares->min1 = starj-1;    cur_ares->max0 = stari+l_ptr->rl-1;    cur_ares->max1 = starj+l_ptr->cl-1;    cur_ares->next = NULL;    /* produce an alignment, encoded in sapp - equivalent to "align() in dropgsw.c" */    (void) diff(&A[stari]-1, &B[starj]-1, l_ptr->rl, l_ptr->cl,		q, q, ppst->pam2[0], q, r, &sapp, &last, l_ptr);    ck = CHECK_SCORE(&A[stari]-1,&B[starj]-1,l_ptr->rl,l_ptr->cl,		     cur_ares->res,ppst->pam2[0],q,r,&cur_ares->nres);#ifdef DEBUG    /* the same errors are produced by Miller and Huang's sim96 code, so I hope        this reflects a mistake in CHECK_SCORE */    if (score != ck) {      fprintf(stderr,"*** Check_score error: orig %d != %d recons ***\n aa0[%d-%d] : aa1[%d-%d]\n",	      score,ck, cur_ares->min0, cur_ares->max0, cur_ares->min1, cur_ares->max1);    }#endif    free(v_cur);    flag = 0;    if (first_pass && maxi(l_ptr->rl, l_ptr->cl) > maxi(M,N)/4) {      /*printf("no locate\n");*/      flag = 1; l_ptr->n1 = l_ptr->m1 = 0;     }     else {      locate(A,B,mini_score,ppst->pam2[0],q,r, nseq, &flag, &vLIST, l_ptr);    }    if ( flag ) {      /*printf("small pass\n");*/      small_pass(A,B,mini_score,ppst->pam2[0],q,r, nseq, &vLIST, l_ptr);    }    first_pass= 0;    count++;  }  /* start cleaning up */  while (vLIST.numnode > 0) {    v_cur = findmax(&vLIST);    if (v_cur) free(v_cur);  }  if (nseq == 1) {    free(l_ptr->row[0]);  }  free(l_ptr->row);  free(l_ptr->c_ss);  free(l_ptr->r_ss);  free(l_ptr->CCC);  free(l_ptr);}/* A big pass to compute classes scoring over K *//* fills in the l_ptr->CCC structure for further analysis *//* adds nodes to v_ptr->LIST when appropriate *//* does not produce alignments */static voidbig_pass(const unsigned char *A,	 const unsigned char *B,	 int M, int N,	 int mini_score,	 int **pam2,	 int Q, int R,	 int nseq,	 struct vert_str *v_ptr,	 struct l_struct *l_ptr){  int  i, j;		/* row and column indices */  int  c;		/* best score at current point */  int  f;		/* best score ending with insertion */  int  d;		/* best score ending with deletion */  int  p;		/* best score at (i-1, j-1) */  int  ci, cj;		/* end-point associated with c */  int  fi, fj;		/* end-point associated with f */  int  pi, pj;		/* end-point associated with p */  space_ptr sp;  pair_ptr z;  int qr;  int  *va;				/* pointer to v(A[i], B[j]) */  qr = Q+R;  /* Compute the matrix and save the best scores in LIST     CC : the scores of the current row     RR and EE : the starting point that leads to score CC     DD : the scores of the current row, ending with deletion     SS and FF : the starting point that leads to score DD  */  /* Initialize the 0 th row */  for ( sp=&l_ptr->CCC[1], j = 1; j <= N; j++, sp++ ) {    sp->CC = sp->RR = 0;    sp->EE = j;    sp->DD = - (qr);    sp->SS = 1;    sp->FF = j;  }  for ( i = 1; i <= M; i++) {    c = 0;				/* Initialize column 0 */    f = - (qr);    va = pam2[A[i]];    ci = fi = i;    if ( nseq == 2 ) {      p = 0;      pi = (i - 1);      cj = fj = pj = 0;    }    else {      p = l_ptr->CCC[i].CC;      pi = l_ptr->CCC[i].RR;      pj = l_ptr->CCC[i].EE;      cj = fj = i;    }    j = (nseq == 2 ? 1: i+1);    for ( sp = &l_ptr->CCC[j]; sp <= &l_ptr->CCC[N]; j++, sp++) {		         d = sp->DD;      c = -1;      /* assign p+va[B[j]] to c if i, j not part of path */      DIAG(i, j, c, p+va[B[j]])		/* diagonal */      if (c < 0) {	p = sp->CC; pi = sp->RR; pj = sp->EE;	if (f >= 0) {	  c = f; ci = fi; cj = fj;	  /* replace ci, cj with sp->SS, sp->FF if c < d */	  ORDER1(c, ci, cj,  d, sp->SS, sp->FF)	  sp->CC = c; sp->RR = ci; sp->EE = cj;	  sp->DD -= R; f-=R;	} else if (d >= 0) {	  sp->CC = d; sp->RR = sp->SS; sp->EE = sp->FF;  	  sp->DD -= R;	} else {	  sp->CC = 0; sp->RR=i; sp->EE = j;	}      } else { 	ci = pi; cj = pj;	ORDER1(c, ci, cj,  f, fi, fj)	ORDER1(c, ci, cj,  d, sp->SS, sp->FF)	p = sp->CC;	sp->CC = c;	pi = sp->RR;	sp->RR = ci;	pj = sp->EE;	sp->EE = cj;	f -= R;	if (c >= qr) {	  if ( c > mini_score) {	/* add the score into list */	    addnode(c, ci, cj, i, j, v_ptr);	  }	  d -= R; c -= qr;	  ORDER1(f, fi, fj, c, ci, cj)	  ORDER1(d, sp->SS, sp->FF, c, ci, cj)	  sp->DD = d;	} else {	  sp->DD -= R;	}      }    }  }}/* Determine the left and top boundaries of the recomputed area *//* this function is not recursive */static voidlocate(const unsigned char *A,       const unsigned char *B,       int mini_score,       int **pam2, int Q, int R,       int nseq,       int *flag_p,       struct vert_str *v_ptr,       struct l_struct *l_ptr) {  int  i, j;		/* row and column indices */  int  c;		/* best score at current point */  int  f;		/* best score ending with insertion */  int  d;		/* best score ending with deletion */  int  p;		/* best score at (i-1, j-1) */  int  ci, cj;		/* end-point associated with c */   int  di, dj;  int  fi, fj;		/* end-point associated with f */  int  pi, pj;		/* end-point associated with p */  space_ptr sp;  pair_ptr z;  bool  cflag, rflag;			/* for recomputation */  int  *va;				/* pointer to v(A[i], B[j]) */  int  limit;				/* the bound on j */  int qr;   qr = Q + R;  /* Reverse pass     rows come from CCC     CC : the scores on the current row     RR and EE : the endpoints that lead to CC     DD : the deletion scores      SS and FF : the endpoints that lead to DD     columns come from c_ss[]     HH : the scores on the current columns     II and JJ : the endpoints that lead to HH     WW : the deletion scores     XX and YY : the endpoints that lead to WW  */  for ( j = l_ptr->nn; j >= l_ptr->n1 ; j-- ) {    l_ptr->CCC[j].CC = 0;    l_ptr->CCC[j].EE = j;    l_ptr->CCC[j].DD = - (Q);    l_ptr->CCC[j].FF = j;    if ( nseq == 2 || j > l_ptr->mm )      l_ptr->CCC[j].RR = l_ptr->CCC[j].SS = l_ptr->mm + 1;    else      l_ptr->CCC[j].RR = l_ptr->CCC[j].SS = j;  }  for ( i = l_ptr->mm; i >= l_ptr->m1; i-- )  {    c = p = 0;    f = - (Q);    ci = fi = i;    pi = i + 1;    cj = fj = pj = l_ptr->nn + 1;    va = pam2[A[i]];    if ( nseq == 2 || l_ptr->n1 > i ) limit = l_ptr->n1;    else limit = i + 1;    for ( j = l_ptr->nn, sp = &l_ptr->CCC[j]; j >= limit ; j--, sp-- ) {      f = f - R;      c = c - qr;      ORDER(f, fi, fj, c, ci, cj)      c = sp->CC - qr;       d = sp->DD - R;      ORDER(d, sp->SS, sp->FF, c, sp->RR, sp->EE)      c = 0;      DIAG(i, j, c, p+va[B[j]])		/* diagonal */      if ( c <= 0 ) { c = 0; ci = i; cj = j; }      else  { ci = pi; cj = pj; }      ORDER1(c, ci, cj, d, sp->SS, sp->FF)      ORDER1(c, ci, cj, f, fi, fj)      p = sp->CC;      sp->CC = c;      pi = sp->RR;      pj = sp->EE;      sp->RR = ci;      sp->EE = cj;      sp->DD = d;      if ( c > mini_score ) *flag_p = 1;    }    if ( nseq == 2 || i < l_ptr->n1 ) {      l_ptr->c_ss[i].HH = l_ptr->CCC[l_ptr->n1].CC;      l_ptr->c_ss[i].II = l_ptr->CCC[l_ptr->n1].RR;      l_ptr->c_ss[i].JJ = l_ptr->CCC[l_ptr->n1].EE;      l_ptr->c_ss[i].WW = f;      l_ptr->c_ss[i].XX = fi;      l_ptr->c_ss[i].YY = fj;    }  }        for ( l_ptr->rl = l_ptr->m1, l_ptr->cl = l_ptr->n1; ; ) {    for ( rflag = cflag = 1; ( rflag && l_ptr->m1 > 1 ) || ( cflag && l_ptr->n1 > 1 ) ;  ) {      if ( rflag && l_ptr->m1 > 1 ) {	/* Compute one row */	rflag = 0;	l_ptr->m1--;	c = p = 0;	f = - (Q);	ci = fi = l_ptr->m1;	pi = l_ptr->m1 + 1;	cj = fj = pj = l_ptr->nn + 1;	va = pam2[A[l_ptr->m1]];	for ( j = l_ptr->nn, sp = &l_ptr->CCC[j]; j >= l_ptr->n1 ; j--, sp-- ) {	  f = f - R;	  c = c - qr;	  ORDER(f, fi, fj, c, ci, cj)	  c = sp->CC - qr; 	  ci = sp->RR;	  cj = sp->EE;	  d = sp->DD - R;	  di = sp->SS;	  dj = sp->FF;	  ORDER(d, di, dj, c, ci, cj)          c = 0;	  DIAG(l_ptr->m1, j, c, p+va[B[j]])		/* diagonal */	  if ( c <= 0 ) { c = 0; ci = l_ptr->m1; cj = j; }	  else { ci = pi; cj = pj; }	  ORDER1(c, ci, cj, d, di, dj)	  ORDER1(c, ci, cj, f, fi, fj)	  sp->SS = di;	  sp->FF = dj;	  p = sp->CC;	  sp->CC = c;	  pi = sp->RR;	  pj = sp->EE;	  sp->RR = ci;	  sp->EE = cj;	  sp->DD = d;	  if ( c > mini_score ) *flag_p = 1;	  if ( ! rflag && ( (ci > l_ptr->rl && cj > l_ptr->cl) || (di > l_ptr->rl && dj > l_ptr->cl) || (fi > l_ptr->rl && fj > l_ptr->cl) ) ) rflag = 1;	}	l_ptr->c_ss[l_ptr->m1].HH = l_ptr->CCC[l_ptr->n1].CC;	l_ptr->c_ss[l_ptr->m1].II = l_ptr->CCC[l_ptr->n1].RR;	l_ptr->c_ss[l_ptr->m1].JJ = l_ptr->CCC[l_ptr->n1].EE;	l_ptr->c_ss[l_ptr->m1].WW = f;	l_ptr->c_ss[l_ptr->m1].XX = fi;	l_ptr->c_ss[l_ptr->m1].YY = fj;	if ( ! cflag && ( (ci > l_ptr->rl && cj > l_ptr->cl) || (di > l_ptr->rl && dj > l_ptr->cl)  || (fi > l_ptr->rl && fj > l_ptr->cl ) )) cflag = 1;      }      if ( nseq == 1 && l_ptr->n1 == (l_ptr->m1 + 1) && ! rflag ) cflag = 0;      if ( cflag && l_ptr->n1 > 1 ) {	/* Compute one column */	cflag = 0;	l_ptr->n1--;	c = 0;	f = - (Q);	cj = fj = l_ptr->n1;	va = pam2[B[l_ptr->n1]];	if ( nseq == 2 || l_ptr->mm < l_ptr->n1 ) {	  p = 0;	  ci = fi = pi = l_ptr->mm + 1;	  pj = l_ptr->n1 + 1;	  limit = l_ptr->mm;	}	else {	  p = l_ptr->c_ss[l_ptr->n1].HH;	  pi = l_ptr->c_ss[l_ptr->n1].II;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -