⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dropfz2.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 5 页
字号:
    }  }  *m1 = j-1+y;  *m2 = j+y;  /*printf("score=%d\n", best);*/  return st;} /*   An alignment is represented as a linked list whose element   is of type match_node. Each element represent an edge in the   path of the alignment graph. The fields of match_node are   l ---  gives the type of the edge.   i, j --- give the end position.*/static match_ptrsmall_global(int x, int y, int ex, int ey, 	     int **wgts, int gop, int gext,	     unsigned char *dnap, unsigned char *pro,	     int N1, int N2, struct f_struct *f_str) {  static int C[SGW1+1][SGW2+1], st[SGW1+1][SGW2+1], D[SGW2+7], I[SGW2+1];  int i, j, e, sc, score, del, k, t,  ci, cd;  int *cI, *cD, *cC, *lC, *cst, e2, e3, e4;  match_ptr mp, first;  struct wgt *wt, *ww;  /*printf("small_global %d %d %d %d\n", x, y, ex, ey);*/  sc = -gop-gext; C[0][0] = 0;   if (N1) I[0] = -gext; else I[0] = sc;  for (j = 1; j <= ey-y+1; j++) {    if (j % 3== 0) {      C[0][j] = sc; sc -= gext; I[j] = sc-gop;    } else I[j] = C[0][j] = -10000;    st[0][j] = 5;  }  lC = &C[0][0]; cD = D; D[0] = D[1] = D[2] = -10000;  cI = I;  for (i = 1; i <= ex-x+1; i++) {    cC = &C[i][0];	    wt = f_str->weight1[pro[i+x-1]]; cst = &st[i][0];    for (j = 0; j <=ey-y+1; j++) {      ci = cI[j];      cd= cD[j];      t = j+y;      ww = &wt[(unsigned char) dnap[t-3]];      if (j >= 4) {	sc = lC[j-3]+ww->iii; e2 = lC[j-2]+ww->ii;  	e4 = lC[j-4]+ww->iv; del = 3;	if (e2 > sc) { sc = e2; del = 2;}	if (e4 >= sc) { sc = e4; del = 4;}      } else {	if (j == 3) {	  sc = lC[0]+ww->iii; del =3;	} else if (j == 2) {	  sc = lC[0]+ww->ii; del = 2;	} else {sc = -10000; del = 0;}      }      if (sc < ci) {	sc = ci; del = 0;       }      if (sc <= cd) {	sc = cd;	del = 5;      }      cC[j] = sc;      sc -= gop;      if (sc <= cd) {	del += 10;	cD[j+3] = cd - gext;      } else cD[j+3] = sc -gext;      if (sc < ci) {	del += 20;	cI[j] = ci-gext;      } else cI[j] = sc-gext;      *(cst++) = del;    }    lC = cC;  }  /*printf("small global score =%d\n", C[ex-x+1][ey-y+1]);*/  if (N2 && cC[ey-y+1] <  ci+gop) st[ex-x+1][ey-y+1] =0;  first = NULL; e = 1;  for (i = ex+1, j = ey+1; i > x || j > y; i--) {    mp = (match_ptr) ckalloc(sizeof(match_node));    mp->i = i-1;    k  = (t=st[i-x][j-y])%10;    mp->j = j-1;    if (e == 5 && (t/10)%2 == 1) k = 5;    if (e == 0 && (t/20)== 1) k = 0;    if (k == 5) { j -= 3; i++; e=5;}    else {j -= k;if (k==0) e= 0; else e = 1;}    mp->l = k;    mp->next = first;    first = mp;  }  /*	for (i = 0; i <= ex-x; i++) {	for (j = 0; j <= ey-y; j++) 	printf("%d ", C[i][j]);	printf("\n");	}  */  return first;	}#define XTERNAL#include "upam.h"voiddisplay_alig(int *a, unsigned char *dna, unsigned char *pro,	     int length, int ld, struct f_struct *f_str){  int len = 0, i, j, x, y, lines, k, iaa;  static char line1[100], line2[100], line3[100],    tmp[10] = "         ", *st;  char *dna1, c1, c2, c3;  line1[0] = line2[0] = line3[0] = '\0'; x= a[0]; y = a[1]-3;  printf("\n%5d\n%5d", y+3, x);  for (len = 0, j = 2, lines = 0; j < length; j++) {    i = a[j];    line3[len] = ' ';    switch (i) {    case 3:       y += 3;      line2[len] = aa[iaa=pro[x++]];      line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c5;      if (line1[len] != f_str->weight_c[iaa][(unsigned char) dna[y]].c3)	line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;      break;    case 2:      y += 2;      line1[len] = '\\';      line2[len++] = ' ';      line2[len] = aa[iaa=pro[x++]];      line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c2;      line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;      break;    case 4:      y += 4;      line1[len] = '/';      line2[len++] = ' ';      line2[len] = aa[iaa=pro[x++]];      line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c4;      line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;      break;    case 5:      y += 3;      line1[len] = f_str->weight_c[0][(unsigned char) dna[y]].c3;      line2[len] = '-';      break;    case 0:      line1[len] = '-';      line2[len] = aa[pro[x++]];      break;    }    len++;    line1[len] = line2[len]  = line3[len]  = '\0';     if (len >= WIDTH) {      for (k = 10; k <= WIDTH; k+=10) 	printf("    .    :");      if (k-5 < WIDTH) printf("    .");      c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];      line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';      printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);      line1[WIDTH] = c1; line2[WIDTH] = c2;      strncpy(line1, &line1[WIDTH], sizeof(line1)-1);      strncpy(line2, &line2[WIDTH], sizeof(line2)-1);      strncpy(line3, &line3[WIDTH], sizeof(line3)-1);      len = len - WIDTH;      printf("\n%5d\n%5d", y+3, x);    }  }  for (k = 10; k < len; k+=10)     printf("    .    :");  if (k-5 < len) printf("    .");  printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);}/* alignment store the operation that align the protein and dna sequence.   The code of the number in the array is as follows:   0:     delete of an amino acid.   2:     frame shift, 2 nucleotides match with an amino acid   3:     match an  amino acid with a codon   4:     the other type of frame shift   5:     delete of a codon      Also the first two element of the array stores the starting point    in the protein and dna sequences in the local alignment.   Display looks like where WIDTH is assumed to be divisible by 10.    0    .    :    .    :    .    :    .    :    .    :    .    :     AACE/N\PLK\G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LWA\S\C\E/P\PRIRZ          I S   G S  V F   N R Q L A     G S  V F   N R Q L A         AACE P P-- G HK Y TWA A C E P P---- G HK Y TWA A C E P P----   60    .    :    .    :    .    :    .    :    .    :    .    :     /G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LW      G S  V F   N R Q L A     G S  V F   N R Q L A     G S  V F       G HK Y TWA A C E P P---- G HK Y TWA A C E P P---- G HK Y TWFor frame shift, the middle row show the letter in the original sequence,and the letter in the top row is the amino acid that is chose by the alignment (translated codon chosen from 4 nucleotides, or 2+1).*//* fatal - print message and die */voidfatal(msg)     char *msg;{  fprintf(stderr, "%s\n", msg);  exit(1);}voiddo_walign (const unsigned char *aa0, int n0,	   const unsigned char *aa1, int n1,	   int frame,	   struct pstruct *ppst, 	   struct f_struct *f_str, 	   struct a_res_str *a_res,	   int *have_ares){  int score;  int i, ir, last_n1, itemp, n10, itx, dnav;  unsigned char *aa1x;    a_res->res = f_str->res;	/* pro_dna needs a valid res at start */  *have_ares = 1;	/* pre_cons() required before calcons() */#ifndef TFAST  score = pro_dna(aa1, n1, f_str->aa0v, n0-2, ppst->pam2[0],#ifdef OLD_FASTA_GAP		  -(ppst->gdelval - ppst->ggapval),#else		  -ppst->gdelval,#endif		  -ppst->ggapval,		  -ppst->gshift,		  f_str, f_str->max_res, a_res);  /* display_alig(f_str->res,f_str->aa0v+2,aa1,*nres,n0-2,f_str); */  #else  /* make a precomputed codon number series */  if (frame==0) {    pre_com(aa1, n1, f_str->aa1v);  }  else { /* must do things backwards */    pre_com_r(aa1, n1, f_str->aa1v);  }  /* make translated sequence */  last_n1 = 0;  aa1x = f_str->aa1x;  for (itx= frame*3; itx< frame*3+3; itx++) {    n10  = saatran(aa1,&aa1x[last_n1],n1,itx);    /*      fprintf(stderr," itt %d itx: %d\n",itt,itx);      for (i=0; i<n10; i++) {      fprintf(stderr,"%c",aa[f_str->aa1x[last_n1+i]]);      if ((i%60)==59) fprintf(stderr,"\n");      }      fprintf(stderr,"\n");    */    last_n1 += n10+1;  }  n10 = last_n1-1;  score = pro_dna(aa0, n0, f_str->aa1v, n1-2, ppst->pam2[0],#ifdef OLD_FASTA_GAP		  -(ppst->gdelval - ppst->ggapval),#else		  -ppst->gdelval,#endif		  -ppst->ggapval,		  -ppst->gshift,		  f_str, f_str->max_res, a_res);  /* display_alig(f_str->res,f_str->aa0y,aa1,*nres,n0,f_str); */#endif  a_res->res = f_str->res;  *have_ares = 1;  a_res->sw_score = score;}voidpre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {#ifdef TFAST  int i, last_n1, itemp, n10;  unsigned char *fs, *fd;  int itx;  /* make a precomputed codon number series */  if (frame==0) {    pre_com(aa1, n1, f_str->aa1v);  }  else { /* must do things backwards */    pre_com_r(aa1, n1, f_str->aa1v);  }#endif}/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev *//* call from calcons, calc_id, calc_code */void aln_func_vals(int frame, struct a_struct *aln) {#ifndef TFAST  aln->llrev = 0;  aln->llfact = 1;  aln->llmult = 1;  aln->qlfact = 3;  aln->frame = 0;  if (frame > 0) aln->qlrev = 1;  else aln->qlrev = 0;#else	/* TFASTX */  aln->qlfact = 1;  aln->qlrev = 0;  aln->llfact = 3;  aln->llmult = 1;  aln->frame = 0;  if (frame > 0) aln->llrev = 1;  else aln->llrev = 0;#endif	/* TFASTX */}#include "structs.h"#include "a_mark.h"intcalcons_a(const unsigned char *aa0, int n0,	  const unsigned char *aa1, int n1,	  int *nc,	  struct a_struct *aln,	  struct a_res_str *a_res,	  struct pstruct *ppst,	  char *seqc0, char *seqc1, char *seqca,	  unsigned char *aa0a, char *seqc0a, char *ann_arr,	  struct f_struct *f_str){  int i0, i1;  int lenc, not_c, itmp, ngap_p, ngap_d, nfs;  char *sp0, *sp0a, *sp1, *spa, *sq;  unsigned char aap;  const unsigned char *ap0, *ap1;  int *rp, *rpmax;  int have_ann = 0;    have_ann = (ann_arr[0]!='\0' && aa0a != NULL);  /* don't fill in the ends */  rpmax = &a_res->res[a_res->nres];		/* end of alignment info */  if (ppst->ext_sq_set) {sq = ppst->sqx;}  else {sq = ppst->sq;}  /* res[0] has start of protein sequence */  /* res[1] has start of translated DNA sequence */#ifndef TFAST  ap0 = f_str->aa0v;		/* computed codons -> ap0*/  ap1 = aa1;			/* protein sequence -> ap1 */  aln->smin1 = a_res->min0;	/* start in protein sequence */  aln->smin0= a_res->min1;		/* start in DNA/codon sequence */#else	/* TFASTYZ */  ap0 = f_str->aa1v;		/* computed codons -> ap0*/  ap1 = aa0;			/* protein sequence */  aln->smin0 = a_res->min0;	/* start in protein sequence */  aln->smin1 = a_res->min1;		/* start in codon sequence */#endif  rp = a_res->res;			/* start of alignment info *//* now get the middle */  spa = seqca;  sp0a = seqc0a;#ifndef TFAST  sp0 = seqc0;		/* sp0/seqc0 is codon sequence */  sp1 = seqc1;		/* sp1/seqc1 is protein sequence */#else  sp1 = seqc0;		/* sp1/seqc0 is protein sequence */  sp0 = seqc1;		/* sp0/seqc1 is codon sequence */#endif  lenc = not_c = aln->nident = aln->nsim = ngap_d = ngap_p = nfs = 0;  i0 = a_res->min1-3;	/* start of codon sequence */  i1 = a_res->min0;	/* start of protein sequence */  while (rp < rpmax ) {    switch (*rp++) {    case 3:		/* match */      i0 += 3;      *sp0a++ = ' ';      *sp1 = sq[aap=ap1[i1++]];      *sp0 = f_str->weight_c[aap][ap0[i0]].c5;      if ((itmp=ppst->pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }      else if (itmp == 0) { *spa = M_ZERO;}      else {*spa = M_POS;}      if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}      if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}      sp0++; sp1++; spa++;      lenc++;      break;    case 2:		/* frame shift +2, then match */      nfs++;      i0 += 2;      *sp0a++ = ' ';      *sp0++ = '/';      *sp1++ = '-';      *spa++ = M_DEL;      not_c++;#ifndef TFAST      *sp0a++ = ' ';#else      if (have_ann) {*sp0a++ = ann_arr[aa0a[i1]];}      else {*sp0a++ = ' ';}#endif      *sp1 = sq[aap=ap1[i1++]];      *sp0 = f_str->weight_c[aap][ap0[i0]].c2;      if ((itmp=ppst->pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }      else if (itmp == 0) { *spa = M_ZERO;}      else {*spa = M_POS;}      if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}      if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}      sp0++; sp1++; spa++;      lenc++;      break;    case 4:		/* frame shift, -1, then match */      nfs++;      i0 += 4;#ifndef TFAST      *sp0a++ = ' ';#else      if (have_ann) {*sp0a++ = ann_arr[aa0a[i1]];}      else {*sp0a++ = ' ';}#endif      *sp0++ = '\\';      *sp1++ = '-';      *spa++ = M_DEL;      not_c++;      *sp1 = sq[aap=ap1[i1++]];      *sp0 = f_str->weight_c[aap][ap0[i0]].c4;      if ((itmp=ppst->pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }      else if (itmp == 0) { *spa = M_ZERO;}      else {*spa = M_POS;}      if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}      if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}      sp0++; sp1++; spa++;      lenc++;      break;    case 5:		/* insertion in 1 */      i0 += 3;      *sp0++ = f_str->weight_c[0][ap0[i0]].c3;      *sp1++ = '-';      *spa++ = M_DEL;      *sp0a++ = ' ';      lenc++;      ngap_p++;      break;    case 0:		/* insertion in 0 */      *sp0++ = '-';#ifndef TFAST      *sp0a++ = ' ';#else      if (have_ann) {*sp0a++ = ann_arr[aa0a[i1]];}      else {*sp0a++ = ' ';}#endif      *sp1++ = sq[ap1[i1++]];      *spa++ = M_DEL;      lenc++;      ngap_d++;      break;    }  }  *sp0a = *spa = '\0';#ifndef TFAST

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -