⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dropfx.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 5 页
字号:
/*	if (maxv.start != dmax->start || maxv.stop != dmax->stop)		printf(" new region: %3d %3d %3d %3d\n",maxv.start,			dmax->start,maxv.stop,dmax->stop);*/   dmax->start = maxv.start;   dmax->stop = maxv.stop;   return maxv.score;}#define XFACT 10int sconn (struct savestr **v, int n,        int cgap, int pgap, struct f_struct *f_str){  int     i, si;  struct slink {    int     score;    struct savestr *vp;    struct slink *next;  }      *start, *sl, *sj, *so, sarr[MAXSAV];  int     lstart, tstart, plstop, ptstop;/*	sort the score left to right in lib pos */  kpsort (v, n);  start = NULL;/*	for the remaining runs, see if they fit */   for (i = 0, si = 0; i < n; i++)   {/*	if the score is less than the gap penalty, it never helps */      if (v[i]->score < cgap)	 continue;      lstart = v[i]->start;      tstart = lstart - v[i]->dp + f_str->noff;/*	put the run in the group */      sarr[si].vp = v[i];      sarr[si].score = v[i]->score;      sarr[si].next = NULL;/* 	if it fits, then increase the score */      for (sl = start; sl != NULL; sl = sl->next)      {	 plstop = sl->vp->stop;	 ptstop = plstop - sl->vp->dp + f_str->noff;	 if (plstop < lstart+XFACT && ptstop < tstart+XFACT) {	   sarr[si].score = sl->score + v[i]->score + pgap;	   break;	 }      }/*	now recalculate where the score fits */      if (start == NULL)	 start = &sarr[si];      else	 for (sj = start, so = NULL; sj != NULL; sj = sj->next)	 {	    if (sarr[si].score > sj->score)	    {	       sarr[si].next = sj;	       if (so != NULL)		  so->next = &sarr[si];	       else		  start = &sarr[si];	       break;	    }	    so = sj;	 }      si++;   }   if (start != NULL)      return (start->score);   else      return (0);}voidkssort (v, n)struct savestr *v[];int     n;{   int     gap, i, j;   struct savestr *tmp;   for (gap = n / 2; gap > 0; gap /= 2)      for (i = gap; i < n; i++)	 for (j = i - gap; j >= 0; j -= gap)	 {	    if (v[j]->score >= v[j + gap]->score)	       break;	    tmp = v[j];	    v[j] = v[j + gap];	    v[j + gap] = tmp;	 }}voidkpsort (v, n)struct savestr *v[];int     n;{   int     gap, i, j;   struct savestr *tmp;   for (gap = n / 2; gap > 0; gap /= 2)      for (i = gap; i < n; i++)	 for (j = i - gap; j >= 0; j -= gap)	 {	    if (v[j]->start <= v[j + gap]->start)	       break;	    tmp = v[j];	    v[j] = v[j + gap];	    v[j + gap] = tmp;	 }}static intdmatchx(const unsigned char *aa0, int n0,	const unsigned char *aa1, int n1,	int hoff, int window, 	int **pam2, int gdelval, int ggapval, int gshift,	struct f_struct *f_str){   hoff -= window/2;#ifndef TFAST   return lx_band(aa1,n1,f_str->aa0y,n0, 		  pam2,#ifdef OLD_FASTA_GAP		  -(gdelval-ggapval),#else		  -gdelval,#endif		  -ggapval,-gshift,		  hoff,window,f_str);#else   return lx_band(aa0,n0,f_str->aa1y,n1, 		  pam2,#ifdef OLD_FASTA_GAP		  -(gdelval-ggapval),#else		  -gdelval,#endif		  -ggapval,-gshift,		  hoff,window,f_str);#endif}static voidinit_row(struct sx_s *row, int sp) {  int i;  for (i = 0; i < sp; i++) {      row[i].C1 = row[i].I1 = 0;      row[i].C2 = row[i].I2 = 0;      row[i].C3 = row[i].I3 = 0;      row[i].flag = 0;  }}intlx_band(const unsigned char *prot_seq,  /* array with protein sequence numbers*/	int len_prot,    /* length of prot. seq */	const unsigned char *dna_prot_seq, /* translated DNA sequence numbers*/	int len_dna_prot,   /* length trans. seq. */	int **pam_matrix,   /* scoring matrix */	int gopen, int gext, /* gap open, gap extend penalties */	int gshift,         /* frame-shift penalty */	int start_diag,     /* start diagonal of band */	int width,         /* width for band alignment */	struct f_struct *f_str){  void *ckalloc();  int i, j, bd, bd1, x1, sp, p1=0, p2=0, end_prot;  int sc, del, best = 0, cd,ci, e1, e2, e3, cd1, cd2, cd3, f, gg;  register int *wt;  const unsigned char *dp;  register struct sx_s *ap, *aq;  sp = width+7;	  gg = gopen+gext;  /*  sp = sp/3; */  if (f_str->cur == NULL)     f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);  init_row(f_str->cur, sp);  /*  if (start_diag %3 !=0) start_diag = start_diag/3-1;  else start_diag = start_diag/3;  */  /*  if (width % 3 != 0) width = width/3+1;  else width = width /3;  */  /* currently, this code assumes that the DNA sequence is longer than the     protein sequence. This is not always true.  len_prot in the loop below     should be decreased to the projection of the DNA on the protein */  x1 = start_diag; 		/* x1 = lower bound of DNA */    end_prot = max(0,-width-start_diag) + (len_dna_prot+5)/3 + width;  end_prot = min(end_prot,len_prot);  /* i counts through protein sequence, x1 through DNAp */  for (i = max(0, -width-start_diag), x1+=i; i < end_prot; i++, x1++) {      bd = min(x1+width, len_dna_prot/3);	/* upper bound of band */      bd1 = max(0,x1);	                /* lower bound of band */      wt = pam_matrix[prot_seq[i]];      del = 1-x1;   /*adjustment*/      bd += del;       bd1 +=del;      ap = &f_str->cur[bd1];      aq = ap+1;      e1 = f_str->cur[bd1-1].C3;      e2 = ap->C1;      cd1 = cd2= cd3= 0;      for (dp = &dna_prot_seq[(bd1-del)*3]; ap < &f_str->cur[bd]; ap++) {	  sc = max(max(e1, (e3=ap->C2))-gshift, e2)+wt[*dp++];	  if (cd1 > sc) sc = cd1;	  cd1 -= gext;	  if ((ci = aq->I1) > 0) {	      if (sc < ci) { ap->C1 = ci; ap->I1 = ci-gext;}	      else {		  ap->C1 = sc;		  sc -= gg;		  if (sc > 0) {		      if (sc > best) best =sc;		      if (cd1 < sc) cd1 = sc;		      ap->I1 = max(ci-gext, sc);		  } else ap->I1 = ci-gext;	      }	  } else {	      if (sc <= 0) {		  ap->I1 = ap->C1 = 0;	      } else {		  ap->C1 = sc; sc-=gg;		  if (sc >0) {		      if (sc > best) best =sc;		      if (cd1 < sc) cd1 = sc;		      ap->I1 = sc;		  } else ap->I1 = 0;	      }	  }	  sc = max(max(e2, (e1=ap->C3))-gshift, e3)+wt[*dp++];	  if (cd2 > sc) sc = cd2;	  cd2 -= gext;	  if ((ci = aq->I2) > 0) {	      if (sc < ci) { ap->C2 = ci; ap->I2 = ci-gext;}	      else {		  ap->C2 = sc;		  sc -= gg;		  if (sc > 0) {		      if (sc > best) best =sc;		      if (cd2 < sc) cd2 = sc;		      ap->I2 = max(ci-gext, sc);		  }	      }	  } else {	      if (sc <= 0) {		  ap->I2 = ap->C2 = 0;	      } else {		  ap->C2 = sc; sc-=gg;		  if (sc >0) {		      if (sc > best) best =sc;		      if (cd2 < sc) cd2 = sc;		      ap->I2 = sc;		  } else ap->I2 = 0;	      }	  }	  sc = max(max(e3, (e2=aq->C1))-gshift, e1)+wt[*dp++];	  if (cd3 > sc) sc = cd3;	  cd3 -= gext;	  if ((ci = aq++->I3) > 0) {	      if (sc < ci) { ap->C3 = ci; ap->I3 = ci-gext;}	      else {		  ap->C3 = sc;		  sc -= gg;		  if (sc > 0) {		      if (sc > best) best =sc;		      if (cd3 < sc) cd3 = sc;		      ap->I3 = max(ci-gext, sc);		  }	      }	  } else {	      if (sc <= 0) {		  ap->I3 = ap->C3 = 0;	      } else {		  ap->C3 = sc; sc-=gg;		  if (sc >0) {		      if (sc > best) best =sc;		      if (cd3 < sc) cd3 = sc;		      ap->I3 = sc;		  } else ap->I3 = 0;	      }	  }      }  }  /*  printf("The best score is %d\n", best); */  return best+gopen+gext;}/* ckalloc - allocate space; check for success */void *ckalloc(size_t amount){  void *p;  if ((p = (void *)malloc( (size_t)amount)) == NULL)    w_abort("Ran out of memory.","");  return(p);}/* calculate the 100% identical score */intshscore(unsigned char *aa0, int n0, int **pam2){  int i, sum;  for (i=0,sum=0; i<n0; i++)    sum += pam2[aa0[i]][aa0[i]];  return sum;}#define SGW1 100#define SGW2 300#define WIDTH 60/* code above is to convert sequence into numbers */typedef struct mat *match_ptr;typedef struct mat {	int i, j, l;	match_ptr next;} match_node;typedef struct {	int i,j;} state;typedef state *state_ptr;typedef struct st_s { int C, I, D;} *st_ptr;/* static st_ptr up=NULL, down, tp; *//* static int *st_up; *//* static int gop, gext, shift; */void *ckalloc(size_t);static match_ptr small_global(), global();static int local_align(), find_best();static void init_row2(),  init_ROW();intpro_dna(const unsigned char *prot_seq,	/* array with prot. seq. numbers*/	int len_prot,	 		/* length of prot. seq */	const unsigned char *dna_prot_seq, /* trans. DNA seq. numbers*/	int len_dna_prot,		/* length trans. seq. */	int **pam_matrix,		/* scoring matrix */	int gopen, int gex,		/* gap open, gap extend penalties */	int gshift,        		/* frame-shift penalty */	int max_res,	struct a_res_str *a_res)	/* alignment info */{  match_ptr align, ap, aq;  int x, y, ex, ey, i, score;  int *alignment;  st_ptr up, down, tp;  /* these globals removed */  /*   gext = gex; gop = gopen; shift = gshift; */  /* for fastx (but not tfastx), these could be moved into init_work(),     and done only once */  up = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));  down = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));  tp = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));  /*local alignment find the best local alignment x (prot) and y (DNA)    is the starting position of the best local alignment    and ex (prot) ey (DNA) is the ending position */  score= local_align(&x, &y, &ex, &ey, pam_matrix,		     gopen, gex, gshift,		     dna_prot_seq, len_dna_prot,		     prot_seq, len_prot, up, down);  /* this is very strange, since local_align initialized up, down */  up += 3; down += 3; tp += 3;  /* x, y - start in prot, dna_prot */  a_res->min0 = x;	/* prot */  a_res->max0 = ex;	/* prot */  a_res->min1 = y;	/* DNA-prot */  a_res->max1 = ey;	/* DNA-prot */  align = global(x, y, ex, ey, pam_matrix, gopen, gex, gshift, 		 dna_prot_seq, prot_seq, 0, 0, &up, &down, &tp);  alignment = a_res->res;  /* from earlier version */  /* alignment[0] = x; */ /* start of alignment in prot */  /* alignment[1] = y; */ /* start of alignment in DNA */  for (ap = align, i= 0; ap; i++) {    if (i < max_res) {alignment[i] = ap->l;}    aq = ap->next; free(ap); ap = aq;  }  if (i >= max_res) {    fprintf(stderr," alignment truncated: %d/%d\n", max_res,i);  }  up = &up[-3]; down = &down[-3]; tp = &tp[-3];  free(up); free(tp); free(down);  /* free(st_up); */ /*  moved into local align */  a_res->nres = i;	/* i has the length of the alignment */  return score;}static voidswap(void **a, void **b) {  void *t;  t = *a;  *a = *b;  *b = t;}/*   local alignment find the best local alignment x and y   is the starting position of the best local alignment   and ex ey is the ending position */static intlocal_align(int *x, int *y, int *ex, int *ey,	    int **wgts, int gop, int gext, int shift,	    unsigned char *dnap, int ld,	    unsigned char *pro,  int lp,	    st_ptr up, st_ptr down) {  int i, j,  score, x1,x2,x3,x4, e1, e2 = 0, e3,    sc, del,  e, best = 0, *wt, cd, ci;  state_ptr cur_st, last_st, cur_i_st;  st_ptr cur, last;  unsigned char *dp;  int *st_up, *cur_d_st;/*         Array rowiC store the best scores of alignment ending at a position   Arrays rowiD, and rowiI store the best scores of alignment ending                 at a position with a deletion or insrtion   Arrays sti stores the starting position of the best alignment whose              score stored in the corresponding row array.   The program stores two rows to complete the computation, same is        for the global alignment routine.*/  /* for fastx (but not tfastx), this could be moved into init_work(),     and done only once */  st_up = (int *) ckalloc(sizeof(int)*(ld+10));  init_row2(st_up, ld+5);  ld += 2;  init_ROW(up, ld+1);	/* set to zero */  init_ROW(down, ld+1);	/* set to zero */  cur = up+1;  last = down+1;   /* for fastx (but not tfastx), these could be moved into init_work(),     and done only once */  cur_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));  last_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));  cur_i_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));  cur_d_st = st_up;   dp = dnap-2;  for (i = 0; i < lp; i++) {    wt = &wgts[pro[i]][0];    for (j = 0; j < 2; j++) {      cur_st[j].i = i+1;      cur_st[j].j = j+1;    }    for (j = 2; j < ld; j++) {      score = wt[dp[j]];      del = -1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -