⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dropfx.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 5 页
字号:
/* alignment store the operation that align the protein and dna sequence.   The code of the number in the array is as follows:   0:     delete of an amino acid.   2:     frame shift, 2 nucleotides match with an amino acid   3:     match an  amino acid with a codon   4:     the other type of frame shift   5:     delete of a codon      Also the first two element of the array stores the starting point    in the protein and dna sequences in the local alignment.   Display looks like where WIDTH is assumed to be divisible by 10.    0    .    :    .    :    .    :    .    :    .    :    .    :     CCTATGATACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTG      P  M  I  L  G  Y  W  N  V  R  G  L  T  H  P  I  R  M  L  L    60    .    :    .    :    .    :    .    :    .    :    .    :     GAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTT      E  Y  T  D  S  S  Y  D  E  K  R  Y  T  M  G  D  A  P  D  F *//* fatal - print message and die */void fatal(msg)char *msg;{	fprintf(stderr, "%s\n", msg);	exit(1);}voiddo_walign (const unsigned char *aa0, int n0,	   const unsigned char *aa1, int n1,	   int frame,	   struct pstruct *ppst, 	   struct f_struct *f_str, 	   struct a_res_str *a_res,	   int *have_ares){  int score;  int i, last_n1, itemp, n10;  int n_aa, n_nt, hoff, nt_min, nt_max, w_fact;  unsigned char *fs, *fd;  struct rstruct rst;  int itx;    a_res->res = f_str->res;	/* pro_dna needs a valid res at start */  *have_ares = 1;	/* pre_cons() required for TFAST */#ifndef TFAST	/* FASTX */  n_aa = n1;  n_nt = n0;  /*  check for large differences in sequence length */  nt_min = 0; nt_max = n_nt;  if (n_nt > 6 * n_aa) {    /* find out where the diagonal is - get hoff       hoff < 0 => seq0 is in the middle of seq1     */    do_fastx(f_str->aa0x, n0, aa1, n1, ppst, f_str, &rst, &hoff);    if (rst.score[0] > 2 * rst.score[2]) {w_fact = 4;}     else w_fact = 2;    if (hoff > n_aa) { /* hoff > 0 => seq1 is in the middle of seq0 */      nt_min = max(0,(hoff-w_fact*n_aa)*3);      nt_max = min((hoff+w_fact*n_aa)*3,n_nt);    }    else {      nt_max = min(3*w_fact*n_aa,n_nt);    }  }  score = pro_dna(aa1, n1, f_str->aa0y+nt_min, nt_max-nt_min, ppst->pam2[0],#ifdef OLD_FASTA_GAP		  -(ppst->gdelval - ppst->ggapval),#else		  -ppst->gdelval,#endif		  -ppst->ggapval,		  -ppst->gshift,		  f_str->max_res, a_res);  /* correct for nt_min missing residues in alignment */#else	/* TFASTX */  /*  for (i=0; i<n1; i++) {    fputc(ppst->sq[f_str->aa1x[i]],stderr);    if (i%60==59) fputc('\n',stderr);  }  fprintf(stderr,"\n-----\n");  */  last_n1 = 0;  for (itx=3*frame; itx<3+3*frame; itx++) {    n10 = saatran(aa1,&f_str->aa1x[last_n1],n1,itx);/*  for (i=0; i<n10; i++) {  fprintf(stderr,"%c",ppst->sq[aa10[last_n1+i]]);  if ((i%60)==59) fprintf(stderr,"\n");  }  fprintf(stderr,"\n");*/    last_n1 += n10+1;  }  n10 = last_n1-1;  /* create aa1y from aa1x */  for (fs=f_str->aa1x,itemp=0; itemp <3; itemp++,fs++) {    for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;    *fd=EOSEQ;  }  /*  for (i=0; i<n1; i++) {    fputc(ppst->sq[f_str->aa1y[i]],stderr);    if (i%60==59) fputc('\n',stderr);  }  fprintf(stderr,"\n-----\n");  */  n_aa = n0;  n_nt = n1;  /*  check for large differences in sequence length */  nt_min = 0; nt_max = n_nt;  if (n_nt > 6 * n_aa) {    /* find out where the diagonal is - get hoff       hoff < 0 => seq0 is in the middle of seq1     */    do_fastx(aa0, n0, f_str->aa1x, n10, ppst, f_str, &rst, &hoff);    if (rst.score[0] > 2 * rst.score[2]) {w_fact = 4;}     else w_fact = 2;    if ( hoff > n_aa) { /* hoff > 0 => seq1 is in the middle of seq0 */      nt_min = max(0,(hoff-w_fact*n_aa)*3);      nt_max = min((hoff+w_fact*n_aa)*3,n_nt);    }    else {      nt_max = min(3*w_fact*n_aa,n_nt);    }  }  score = pro_dna(aa0, n0, f_str->aa1y+nt_min, nt_max-nt_min, ppst->pam2[0],#ifdef OLD_FASTA_GAP		  -(ppst->gdelval - ppst->ggapval),#else		  -ppst->gdelval,#endif		  -ppst->ggapval,		  -ppst->gshift,		  f_str->max_res, a_res);#endif	/* TFASTX */  /* pro_dna always compares protein to DNA, and returns protein     coordinates in a_res->min0,max0 */  a_res->min1 += nt_min;  a_res->max1 += nt_min;  /* display_alig(f_str->res,f_str->aa0y,aa1,*nres,n0); */  a_res->sw_score = score;}/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev *//* call from calcons, calc_id, calc_code */void aln_func_vals(int frame, struct a_struct *aln) {#ifndef TFAST  aln->llrev = 0;  aln->llfact = 1;  aln->llmult = 1;  aln->qlfact = 3;  aln->frame = 0;  if (frame > 0) aln->qlrev = 1;  else aln->qlrev = 0;#else	/* TFASTX */  aln->qlfact = 1;  aln->qlrev = 0;  aln->llfact = 3;  aln->llmult = 1;  aln->frame = 0;  if (frame > 0) aln->llrev = 1;  else aln->llrev = 0;#endif	/* TFASTX */}/* this function is required for programs like tfastx/y/s that do   translations on DNA sequences and save them in f_str->aa1??*/voidpre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {#ifdef TFAST  int i, last_n1, itemp, n10;  unsigned char *fs, *fd;  int itx;  last_n1 = 0;  for (itx=3*frame; itx<3+3*frame; itx++) {    n10 = saatran(aa1,&f_str->aa1x[last_n1],n1,itx);/*  for (i=0; i<n10; i++) {  fprintf(stderr,"%c",ppst->sq[aa10[last_n1+i]]);  if ((i%60)==59) fprintf(stderr,"\n");  }  fprintf(stderr,"\n");*/    last_n1 += n10+1;  }  n10 = last_n1-1;  /* create aa1y from aa1x */  for (fs=f_str->aa1x,itemp=0; itemp <3; itemp++,fs++) {    for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;    *fd=EOSEQ;  }#endif}/*   Alignment: store the operation that align the protein and dna sequence.   The code of the number in the array is as follows:   0:     delete of an amino acid.   2:     frame shift, 2 nucleotides match with an amino acid   3:     match an  amino acid with a codon   4:     the other type of frame shift   5:     delete of a codon   The first two elements of the array stores the starting point    in the protein and dna sequences in the local alignment.*/#include "a_mark.h"intcalcons_a(const unsigned char *aa0, int n0,	  const unsigned char *aa1, int n1,	  int *nc,	  struct a_struct *aln,	  struct a_res_str *a_res, 	  struct pstruct *ppst,	  char *seqc0, char *seqc1, char *seqca,	  unsigned char *aa0a, char *seqc0a, char *ann_arr,	  struct f_struct *f_str){  int i0, i1, i, j;  int lenc, not_c, itmp, ngap_p, ngap_d, nfs;  char *sp0, *sp0a, *sp1, *spa, *sq;  const unsigned char *ap0, *ap1;  int *rp, *rpmax;  int have_ann = 0;    if (ppst->ext_sq_set) {sq = ppst->sqx;}  else {sq = ppst->sq;}#ifndef TFAST	/* FASTX */  aln->amin1 = aln->smin1 = a_res->min0;	/* prot */  aln->amin0 = aln->smin0 = a_res->min1;	/* DNA */  ap0 = f_str->aa0y;	/* translated DNA */  ap1 = aa1;		/* protein */#else		/* TFASTX */  have_ann = (ann_arr[0]!='\0' && aa0a != NULL);  aln->amin0 = aln->smin0 = a_res->min0;	/* DNA */  aln->amin1 = aln->smin1 = a_res->min1;	/* prot */  ap1 = aa0;  ap0 = f_str->aa1y;#endif  rp = a_res->res;  rpmax = &a_res->res[a_res->nres];#ifndef TFAST  sp0 = seqc0;  sp1 = seqc1;#else  sp1 = seqc0;  sp0 = seqc1;#endif  spa = seqca;  sp0a = seqc0a;  lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs= 0;  i0 = a_res->min1;  i1 = a_res->min0;  while (rp < rpmax) {    /*    fprintf(stderr,"%d %d %d (%c) %d (%c)\n"	  ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]);    */    switch (*rp++) {    case 0: 	/* aa insertion */      *sp0++ = '-';      *sp1++ = sq[ap1[i1++]];      *spa++ = M_DEL;      *sp0a++ = ' ';      lenc++;      ngap_d++;      break;    case 2:	/* -1 frameshift */      nfs++;      *sp0++ = '/';      i0 -= 1;      *sp1++ = '-';      *spa++ = M_DEL;      *sp0a++ = ' ';      not_c++;      if ((itmp=ppst->pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }      else if (itmp == 0) { *spa = M_ZERO;}      else {*spa = M_POS;}      if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}      if (have_ann) {*sp0a++ = ann_arr[aa0a[i1]];}      else {*sp0a++ = ' ';}      *sp0 = sq[ap0[i0]];      i0 += 3;      *sp1 = sq[ap1[i1++]];      if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}      sp0++; sp1++; spa++;      lenc++;      break;    case 3:	/* codon/aa match */      if ((itmp=ppst->pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }      else if (itmp == 0) { *spa = M_ZERO;}      else {*spa = M_POS;}      if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}      if (have_ann) {*sp0a++ = ann_arr[aa0a[i1]];}      else {*sp0a++ = ' ';}      *sp0 = sq[ap0[i0]];      i0 += 3;      *sp1 = sq[ap1[i1++]];      if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}      sp0++; sp1++; spa++;      lenc++;      break;    case 4:	/* +1 frameshift */      nfs++;      *sp0a++ = ' ';      *sp0++ = '\\';      i0 += 1;      *sp1++ = '-';      *spa++ = M_DEL;      not_c++;      if ((itmp=ppst->pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }      else if (itmp == 0) { *spa = M_ZERO;}      else {*spa = M_POS;}      if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}      *sp0 = sq[ap0[i0]];      i0 += 3;      *sp1 = sq[ap1[i1++]];      if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}      sp0++; sp1++; spa++;      lenc++;      break;    case 5:	/* codon insertion */      *sp0a++ = ' ';      *sp0++ = sq[ap0[i0]];      i0 += 3;      *sp1++ = '-';      *spa++ = M_DEL;      lenc++;      ngap_p++;      break;    }  }  *sp0a = *spa = '\0';#ifndef TFAST  aln->amax0 = i0;  aln->amax1 = i1;  aln->ngap_q = ngap_d;  aln->ngap_l = ngap_p;#else  aln->amax1 = i0;  aln->amax0 = i1;  aln->ngap_q = ngap_p;  aln->ngap_l = ngap_d;#endif  aln->nfs = nfs;  if (lenc < 0) lenc = 1;  *nc = lenc;/*	now we have the middle, get the right end */  return lenc+not_c;}/* build an array of match/ins/del - length strings */int calc_code(const unsigned char *aa0, int n0,	      const unsigned char *aa1, int n1,	      struct a_struct *aln,	      struct a_res_str *a_res, 	      struct pstruct *ppst,	      char *al_str, int al_str_n, 	      const unsigned char *aa0a, const char *ann_arr,	      char *ann_str, int ann_str_n,	      struct f_struct *f_str){  int i0, i1, i, j;  int lenc, not_c, itmp, ngap_p, ngap_d, nfs;  char op_char[10];  int op, op_cnt;  char sp0, sp1, *sq;  const unsigned char *ap0, *ap1;  int *rp, *rpmax;  int have_ann = 0;  char tmp_astr[MAX_STR];  int sim_code;  char *sim_sym= aln_map_sym[MX_ACC];    if (ppst->ext_sq_set) {sq = ppst->sqx;}  else {sq = ppst->sq;}#ifndef TFAST	/* FASTX */  strncpy(op_char,"- /=\\+*",sizeof(op_char));  aln->amin1 = aln->smin1 = a_res->min0;	/* prot */  aln->amin0 = aln->smin0 = a_res->min1;	/* DNA */  ap0 = f_str->aa0y;  ap1 = aa1;#else		/* TFASTX */  have_ann = (ann_arr[0] != '\0' && aa0a != NULL);  strncpy(op_char,"+ /=\\-*",sizeof(op_char));  aln->amin0 = aln->smin0 = a_res->min0;	/* DNA */  aln->amin1 = aln->smin1 = a_res->min1;	/* prot */  ap1 = aa0;  ap0 = f_str->aa1y;#endif  rp = a_res->res;  rpmax = &a_res->res[a_res->nres];  op_cnt = lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs = 0;  op = 3; /* code for a match - all alignments start with a match */  i0 = a_res->min1;  i1 = a_res->min0;  while (rp < rpmax) {    switch (*rp++) {    case 0: 	/* aa insertion */      if (op == 0) op_cnt++;      else {	update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);	op = 0; op_cnt = 1;      }      i1++;      lenc++;      ngap_d++;      break;    case 2:	/* -1 frameshift */      if (ppst->pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}      update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);      op = 2; op_cnt = 1;      update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);      op = 3; op_cnt = 1;      nfs++;      i0 -= 1;      not_c++;      sp0 = sq[ap0[i0]];      i0 += 3;      sp1 = sq[ap1[i1++]];      if (toupper(sp0) == toupper(sp1)) aln->nident++;      lenc++;      break;    case 3:	/* codon/aa match */      sim_code = M_NEG;      if (ppst->pam2[0][ap0[i0]][ap1[i1]]>=0) {	sim_code = M_POS;	aln->nsim++;      }      sp0 = sq[ap0[i0]];      sp1 = sq[ap1[i1]];      if (toupper(sp0) == toupper(sp1)) {	aln->nident++;	sim_code = M_IDENT;      }      /* check for an annotation */      if (have_ann && ann_arr[aa0a[i0]] != ' ') {	sprintf(tmp_astr, "|%d:%d:%c%c%c%c",		aln->q_offset+i1+1,aln->l_offset+i0+1,ann_arr[aa0a[i0]],sim_sym[sim_code],sp0,sp1);	strncat(ann_str, tmp_astr, ann_str_n - strlen(ann_str) - 1)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -