⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dropfz2.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 5 页
字号:
	  }      }  }  /*  printf("The best score is %d\n", best); */  return best+gg;}/* ckalloc - allocate space; check for success */void *ckalloc(size_t amount){	void *p;	if ((p = (void *)malloc( (size_t)amount)) == NULL)		w_abort("Ran out of memory.","");	return(p);}/* calculate the 100% identical score */intshscore(unsigned char *aa0, int n0, int **pam2){  int i, sum;  for (i=0,sum=0; i<n0; i++)    sum += pam2[aa0[i]][aa0[i]];  return sum;}#define SGW1 100#define SGW2 300#define WIDTH 60typedef struct mat *match_ptr;typedef struct mat {	int i, j, l;	match_ptr next;} match_node;typedef struct { int i,j;} state;typedef state *state_ptr;void *ckalloc();static match_ptr small_global(), global();static int local_align(), find_best();static void init_row2(), init_ROW();intpro_dna(const unsigned char *prot_seq,  /* array with prot. seq. numbers*/	int len_prot,    /* length of prot. seq */	const unsigned char *dna_prot_seq, /* trans. DNA seq. numbers*/	int len_dna_prot,   /* length trans. seq. */	int **pam_matrix,   /* scoring matrix */	int gopen, int gext, /* gap open, gap extend penalties */	int gshift,         /* frame-shift penalty */	struct f_struct *f_str,	int max_res,	struct a_res_str *a_res) /* alignment info */{  match_ptr align, ap, aq;  int x, y, ex, ey, i, score;  int *alignment;  f_str->up = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));  f_str->down = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));  f_str->tp = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));  /*local alignment find the best local alignment x and y    is the starting position of the best local alignment    and ex ey is the ending position */  score= local_align(&x, &y, &ex, &ey, 		     pam_matrix, gopen, gext,		     dna_prot_seq, len_dna_prot,		     prot_seq, len_prot, f_str);  f_str->up += 3; f_str->down += 3; f_str->tp += 3;  /* x, y - start in prot, dna_prot */  a_res->min0 = x;	/* prot */  a_res->min1 = y;	/* DNA */  a_res->max0 = ex;	/* prot */  a_res->max1 = ey;	/* DNA */  align = global(x, y, ex, ey, 		 pam_matrix, gopen, gext,		 dna_prot_seq, prot_seq,		 0, 0, f_str);  alignment = a_res->res;  for (ap = align, i= 0; ap; i++) {    if (i < max_res) alignment[i] = ap->l;    aq = ap->next; free(ap); ap = aq;  }  if (i >= max_res)    fprintf(stderr,"***alignment truncated: %d/%d***\n", max_res,i);  /*   up = &up[-3]; down = &down[-3]; tp = &tp[-3]; */  free(&f_str->up[-3]); free(&f_str->tp[-3]); free(&f_str->down[-3]);  a_res->nres = i;  return score;}static voidswap(void **a, void **b){    void *t = *a;    *a = *b;   *b = t;}/*   local alignment find the best local alignment x and y   is the starting position of the best local alignment   and ex ey is the ending position */static intlocal_align(int *x, int *y, int *ex, int *ey, 	    int **wgts, int gop, int gext,	    const unsigned char *dnap, int ld,	    const unsigned char *pro, int lp,	    struct f_struct *f_str){  int i, j,  score, x1,x2,x3,x4, e1 = 0, e2 = 0, e3,    sc, del,  e, best = 0,  cd, ci, c;  struct wgt *wt, *ww;  state_ptr cur_st, last_st, cur_i_st;  st_ptr cur, last;  const unsigned char *dp;  int *cur_d_st, *st_up;  /*      	  Array rowiC stores the best scores of alignment ending at a position	  Arrays rowiD and rowiI store the best scores of alignment ending	  at a position with a deletion or insrtion	  Arrays sti stores the starting position of the best alignment whose	  score stored in the corresponding row array.	  The program stores two rows to complete the computation, same is	  for the global alignment routine.  */  st_up = (int *) ckalloc(sizeof(int)*(ld+10));  init_row2(st_up, ld+5);  ld += 2;  init_ROW(f_str->up, ld+1);  init_ROW(f_str->down, ld+1);  cur = f_str->up+1;  last = f_str->down+1;   cur_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));  last_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));  cur_i_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));  cur_d_st = st_up;   dp = dnap-2;  for (i = 0; i < lp; i++) {    wt = f_str->weight1[pro[i]];  e2 =0; e1 = last[0].C;    for (j = 0; j < 2; j++) {      cur_st[j].i = i+1;      cur_st[j].j = j+1;    }    for (j = 2; j < ld; j++) {      ww = &wt[(unsigned char) dp[j]];      del = -1;      if (j >= 3) {	sc = 0;	e3 = e2; e2 = e1;	e1 = last[j-2].C; 	if ((e=e2+ww->iii) > sc) {sc = e; del = 3;}	if ((e=e1+ww->ii) > sc) {sc = e; del = 2;}	if ((e = e3+ww->iv) > sc) {sc = e; del = 4;}       } else {	sc = e2  = 0;	if (ww->iii > 0) {sc = ww->iii; del = 3;}      }      if (sc < (ci=last[j].I)) {	sc = ci; del = 0;      }      if (sc < (cd=cur[j].D)) {	sc = cd; del = 5;      }      cur[j].C = sc;      e = sc  - gop;      if (e > cd) {	cur[j+3].D = e-gext;	cur_d_st[j+3] = 3;      } else {	cur[j+3].D = cd-gext;	cur_d_st[j+3] = cur_d_st[j]+3;      }      switch(del) {      case 5:	c = cur_d_st[j];	cur_st[j].i = cur_st[j-c].i;	cur_st[j].j = cur_st[j-c].j;	break;      case 0:	cur_st[j].i = cur_i_st[j].i;	cur_st[j].j = cur_i_st[j].j;	break;      case 2:      case 3:      case 4:	if (i) {	  if (j-del >= 0) {	    cur_st[j].i = last_st[j-del].i;	    cur_st[j].j = last_st[j-del].j;	  } else {	    cur_st[j].i = i;	    cur_st[j].j = 0;	  }	} else {	  cur_st[j].i = 0;	  cur_st[j].j = max(0, j-del+1);	}	break;      case -1:	cur_st[j].i = i+1;	cur_st[j].j = j+1;	break;      }      if (e > ci) {	cur[j].I  = e -gext;	cur_i_st[j].i = cur_st[j].i;	cur_i_st[j].j = cur_st[j].j;      } else {	cur[j].I  = ci- gext;      }      if (sc > best) {	x1 = cur_st[j].i;	x2 = cur_st[j].j;	best =sc;	x3 = i;	x4 = j;      }    }    swap((void *)&last, (void *)&cur);    swap((void *)&cur_st, (void *)&last_st);  }  /*	printf("The best score is %d\n", best);*/  *x = x1; *y = x2; *ex = x3; *ey = x4;  free(cur_st); free(last_st); free(cur_i_st);   free(st_up);  return best;}/*    Both global_up and global_down do linear space score only global    alignments on subsequence pro[x]...pro[ex], and dna[y]...dna[ey].   global_up do the algorithm upwards, from row x towards row y.   global_down do the algorithm downwards, from row y towards x.*/static voidglobal_up(st_ptr *row1, st_ptr *row2, 	  int x, int y, int ex, int ey, 	  int **wgts, int gop, int gext,	  unsigned char *dnap, unsigned char *pro, 	  int N, struct f_struct *f_str){  int i, j, k, sc, e, e1, e2, e3, t, ci, cd, score;  struct wgt *wt, *ww;  st_ptr cur, last;  cur = *row1; last = *row2;  sc = -gop;  for (j = 0; j <= ey-y+1; j++) {    if (j % 3 == 0) {last[j].C = sc; sc -= gext; last[j].I = sc-gop;}    else { last[j].I = last[j].C = -10000;}  }    last[0].C = 0; cur[0].D = cur[1].D = cur[2].D = -10000;  last[0].D = last[1].D = last[2].D = -10000;  if (N) last[0].I = -gext;  for (i = 1; i <= ex-x+1; i++) {    wt = f_str->weight1[pro[i+x-1]]; e1 = -10000; e2 = last[0].C;    for (j = 0; j <= ey-y+1; j++) {      t = j+y;      sc = -10000;       ww = &wt[(unsigned char) dnap[t-3]];       if (j < 4) {	if (j == 3) {	  sc = e2+ww->iii;	} else if (j == 2) {	  sc = e2 + ww->ii;	}      } else {	e3 = e2; e2 = e1;	e1 = last[j-2].C;	sc = max(e2+ww->iii, max(e1+ww->ii, e3+ww->iv));      }      sc = max(sc, max(ci=last[j].I, cd = cur[j].D));      cur[j].C = sc;      cur[j+3].D = max(cd, sc-gop)-gext;      cur[j].I = max(ci, sc-gop)-gext;    }    swap((void *)&last, (void *)&cur);  }  /*printf("global up score =%d\n", last[ey-y+1].C);*/  for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;	  if (*row1 != last) swap((void *)row1, (void *)row2);}static voidglobal_down(st_ptr *row1, st_ptr *row2,	    int x, int y, int ex, int ey, 	    int **wgts, int gop, int gext,	    unsigned char *dnap, unsigned char *pro,	    int N, struct f_struct *f_str){  int i, j, k, sc, del, *tmp, e,  t, e1,e2,e3, ci,cd, score;  struct wgt *wt, *w1, *w2, *w3;  st_ptr cur, last;  cur = (*row1); last = *row2;  sc = -gop;  for (j = ey-y+1; j >= 0; j--) {    if ((ey-y+1-j) % 3) {last[j].C = sc; sc-=gext; last[j].I = sc-gop;}    else  last[j].I =  last[j].C = -10000;    cur[j].I = -10000;  }   last[ey-y+1].C = 0;  if (N) last[ey-y+1].I = -gext;  cur[ey-y+1].D = cur[ey-y].D = cur[ey-y-1].D = -10000;  last[ey-y+1].D = last[ey-y].D = last[ey-y-1].D = -10000;  for (i = ex-x; i >= 0; i--) {    wt = f_str->weight1[pro[i+x]]; e2 = last[ey-y+1].C;     e1 = -10000;    w3 = &wt[(unsigned char) dnap[ey]];     w2 = &wt[(unsigned char) dnap[ey-1]];    for (j = ey-y+1; j >= 0; j--) {      t = j+y;      w1 = &wt[(unsigned char) dnap[t-1]];      sc = -10000;      if (t+3 > ey) {	if (t+2 == ey) {	  sc = e2+w2->iii; 	} else if (t+1 == ey) {	  sc = e2+w1->ii;	}      } else {	e3 = e2; e2 = e1;	e1 = last[j+2].C;	sc = max(e2+w2->iii, max(e1+w1->ii,e3+w3->iv)) ;      }      if (sc < (cd= cur[j].D)) {	sc = cd; 	cur[j-3].D = cd-gext;      } else cur[j-3].D =max(cd, sc-gop)-gext;      if (sc < (ci= last[j].I)) {	sc = ci;	cur[j].I = ci - gext;      } else cur[j].I = max(sc-gop,ci)-gext;      cur[j].C = sc;      w3 = w2; w2 = w1;    }    swap((void *)&last, (void *)&cur);  }  for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;  if (*row1 != last) swap((void *)row1, (void *)row2);}static voidinit_row2(int *row, int ld) {  int i;  for (i = 0; i < ld; i++) row[i] = 0;}static void init_ROW(st_ptr row, int ld) {  int i;  for (i = 0; i < ld; i++) row[i].I = row[i].D = row[i].C = 0;}static match_ptrcombine(match_ptr x1, match_ptr x2, int st) {  match_ptr x;  if (x1 == NULL) return x2;  for (x = x1; x->next; x = x->next);  x->next = x2;  if (st) {    for (x = x2; x; x = x->next) {      x->j++;      if (x->l == 3 || x->l == 4) break;    }    x->l--;  }  return x1;}/*   global use the two upwards and downwards score only linear   space global alignment subroutine to recursively build the   alignment.*/match_ptrglobal(int x, int y, int ex, int ey,       int **wgts, int gop, int gext,       unsigned char *dnap, unsigned char *pro, int N1, int N2,       struct f_struct *f_str){  int m;  int m1, m2;  match_ptr x1, x2, mm1, mm2;  /*printf("%d %d %d %d %d %d\n", x,y, ex, ey, N1, N2);*/  /*    if the space required is limited, we can do a quadratic space    algorithm to find the alignment.  */  if (ex <= x) {    mm1  = NULL;    for (m = y+3; m <= ey; m+=3) {      x1 = (match_ptr) ckalloc(sizeof(match_node));      x1->l = 5; x1->next = mm1;       if (mm1== NULL) mm2 = x1;      mm1 = x1;    }    if (ex == x) {      if ((ey-y) % 3 != 0) {	x1  = (match_ptr) ckalloc(sizeof(match_node));	x1->l = ((ey-y) % 3) +1; x1->next = NULL;	if (mm1) mm2->next = x1; else mm1 = x1;      } else mm2->l = 4;    }    return mm1;  }  if (ey <= y) {    mm1  = NULL;    for (m = x; m <= ex; m++) {      x1 = (match_ptr) ckalloc(sizeof(match_node));      x1->l = 0; x1->next = mm1; mm1 = x1;    }    return mm1;  }  if (ex -x < SGW1 && ey-y < SGW2)     return small_global(x,y,ex,ey,wgts, gop, gext,  dnap, pro, N1, N2,f_str);  m = (x+ex)/2;  /*     	 Do the score only global alignment from row x to row m, m is	 the middle row of x and ex. Store the information of row m in	 upC, upD, and upI.  */  global_up(&f_str->up, &f_str->tp,  x, y, m, ey,	    wgts, gop, gext,	    dnap, pro, N1, f_str);  /*      Do the score only global alignment downwards from row ex     to row m+1, store information of row m+1 in downC downI and downD  */  global_down(&f_str->down, &f_str->tp, m+1, y, ex, ey,	      wgts, gop, gext,	      dnap, pro, N2, f_str);  /*    Use this information for row m and m+1 to find the crossing    point of the best alignment with the middle row. The crossing    point is given by m1 and m2. Then we recursively call global    itself to compute alignments in two smaller regions found by    the crossing point and combine the two alignments to form a    whole alignment. Return that alignment.  */  if (find_best(f_str->up, f_str->down, &m1, &m2, ey-y+1, y, gop)) {    x1 = global(x, y, m, m1, wgts, gop, gext, dnap, pro, N1, 0, f_str);    x2 = global(m+1, m2, ex, ey, wgts, gop, gext, dnap, pro, 0, N2, f_str);    if (m1 == m2) x1 = combine(x1,x2,1);    else x1 = combine(x1, x2,0);  } else {    x1 = global(x, y, m-1, m1, wgts, gop, gext, dnap, pro, N1, 1, f_str);    x2 = global(m+2, m2, ex, ey, wgts, gop, gext, dnap, pro, 1, N2, f_str);    mm1 = (match_ptr) ckalloc(sizeof(match_node));    mm1->i = m; mm1->l = 0; mm1->j = m1;    mm2 = (match_ptr) ckalloc(sizeof(match_node));    mm2->i = m+1; mm2->l = 0; mm2->j = m1;    mm1->next = mm2; mm2->next = x2;    x1 = combine(x1, mm1, 0);  }  return x1;}static intfind_best(st_ptr up, st_ptr down, int *m1, int *m2, int ld, int y, int gop) {  int i, best = -1000, j = 0, s1, s2, s3, s4, st;  for (i = 1; i < ld; i++) {    s2 = up[i].C + down[i].C;    s4 = up[i].I + down[i].I + gop;    if (best < s2) {      best = s2; j = i; st = 1;    }    if (best < s4) {      best = s4; j = i; st = 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -