⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cal_cons.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
字号:
/* cal_cons.c - routines for printing translated alignments for fasta, ssearch, ggsearch, glsearch  *//* copyright (c) 1998, 1999, 2007 by William R. Pearson and the University of Virginia *//* $Name: fa35_03_06 $ - $Id: cal_cons.c,v 1.3 2007/12/13 14:38:19 wrp Exp $ *//* removed from dropgsw2.c, dropnfa.c April, 2007 */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include "defs.h"#include "param.h"#ifdef FASTA#include "dropnfa.h"#endif#if defined(SSEARCH) || defined(OSEARCH)#include "dropgsw2.h"#endif#ifdef LALIGN#include "dropgsw2.h"#endif#include "a_mark.h"static void update_code(char *al_str, int al_str_max, int op, int op_cnt);extern void aancpy(char *to, char *from, int count, struct pstruct *ppst);int calcons_a(const unsigned char *aa0, int n0,	      const unsigned char *aa1, int n1,	      int *nc,	      struct a_struct *aln,	      struct a_res_str *a_res,	      struct pstruct *ppst,	      char *seqc0, char *seqc1, char *seqca,	      unsigned char *aa0a, char *seqc0a, char *ann_arr,	      struct f_struct *f_str){  int i0, i1, nn1;  int op, lenc, nd, ns, itmp;  const unsigned char *aa1p;  char *sp0, *sp0a, *sp1, *spa, *sq;  int *rp;  int smins, mins;  int have_ann = 0;  have_ann = (ann_arr[0] != '\0' && aa0a != NULL);  if (ppst->ext_sq_set) {    sq = ppst->sqx;  }  else {    sq = ppst->sq;  }#ifndef TFASTA  aa1p = aa1;  nn1 = n1;#else  aa1p = f_str->aa1x;  nn1 = f_str->n10;#endif  aln->amin0 = a_res->min0;  aln->amax0 = a_res->max0;  aln->amin1 = a_res->min1;  aln->amax1 = a_res->max1;#ifndef LCAL_CONS  /* will we show all the start ?*/  if (min(a_res->min0,a_res->min1)<aln->llen || aln->showall==1)    if (a_res->min0>=a_res->min1) {              /* aa0 extends more to left */      smins=0;      if (aln->showall==1) mins = a_res->min0;      else mins = min(a_res->min0,aln->llcntx);      aancpy(seqc0,(char *)aa0+a_res->min0-mins,mins,ppst);      aln->smin0 = a_res->min0-mins;      if ((mins-a_res->min1)>0) {	memset(seqc1,' ',mins-a_res->min1);	aancpy(seqc1+mins-a_res->min1,(char *)aa1p,a_res->min1,ppst);	aln->smin1 = 0;      }      else {	aancpy(seqc1,(char *)aa1p+a_res->min1-mins,mins,ppst);	aln->smin1 = a_res->min1-mins;      }    }    else {      smins=0;      if (aln->showall == 1) mins=a_res->min1;      else mins = min(a_res->min1,aln->llcntx);      aancpy(seqc1,(char *)(aa1p+a_res->min1-mins),mins,ppst);      aln->smin1 = a_res->min1-mins;      if ((mins-a_res->min0)>0) {	memset(seqc0,' ',mins-a_res->min0);	aancpy(seqc0+mins-a_res->min0,(char *)aa0,a_res->min0,ppst);	aln->smin0 = 0;      }      else {	aancpy(seqc0,(char *)aa0+a_res->min0-mins,mins,ppst);	aln->smin0 = a_res->min0-mins;      }    }  else {    mins= min(aln->llcntx,min(a_res->min0,a_res->min1));    smins=mins;    aln->smin0=a_res->min0 - smins;    aln->smin1=a_res->min1 - smins;    aancpy(seqc0,(char *)aa0+a_res->min0-mins,mins,ppst);    aancpy(seqc1,(char *)aa1p+a_res->min1-mins,mins,ppst);  }  /* set the alignment code to zero for context */  memset(seqca,0,mins);  memset(seqc0a,' ',mins);#else		/* no flanking context */  smins = mins = 0;  aln->smin0=a_res->min0;  aln->smin1=a_res->min1;#endif/* now get the middle */  spa = seqca+mins;  sp0 = seqc0+mins;  sp0a = seqc0a+mins;  sp1 = seqc1+mins;  rp = a_res->res;  lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;  i0 = a_res->min0;  i1 = a_res->min1;    while (i0 < a_res->max0 || i1 < a_res->max1) {    if (op == 0 && *rp == 0) {      op = *rp++;      lenc++;      if ((itmp=ppst->pam2[0][aa0[i0]][aa1p[i1]])<0) { *spa = M_NEG; }      else if (itmp == 0) { *spa = M_ZERO;}      else {*spa = M_POS;}      if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;      if (have_ann) {	*sp0a++ = ann_arr[aa0a[i0]];      }      else {	*sp0a++ = ' ';      }      *sp0 = sq[aa0[i0++]];      *sp1 = sq[aa1p[i1++]];      if (toupper(*sp0) == toupper(*sp1)) {	aln->nident++;	*spa = M_IDENT;      }      else if (ppst->nt_align) {	if ((toupper(*sp0) == 'T' && toupper(*sp1) == 'U') ||	    (toupper(*sp0)=='U' && toupper(*sp1)=='T')) {	  aln->nident++;	  *spa = M_IDENT;	}	else if (toupper(*sp0) == 'N') aln->ngap_q++;	else if (toupper(*sp1) == 'N') aln->ngap_l++;      }      sp0++; sp1++; spa++;    }    else {      if (op==0) op = *rp++;      if (op>0) {	*sp0++ = '-';	*sp1++ = sq[aa1p[i1++]];	*spa++ = M_DEL;	*sp0a++ = ' ';	op--;	lenc++;	aln->ngap_q++;      }      else {	if (have_ann) {	  *sp0a++ = ann_arr[aa0a[i0]];	}	else {	  *sp0a++ = ' ';	}	*sp0++ = sq[aa0[i0++]];	*sp1++ = '-';	*spa++ = M_DEL;	op++;	lenc++;	aln->ngap_l++;      }    }  }  *nc = lenc;  *sp0a = *spa = '\0';#ifndef LCAL_CONS	/* have context around alignment *//*	now we have the middle, get the right end */  if (!aln->llcntx_flg) {    ns = mins + lenc + aln->llen;	/* show an extra line? */    ns -= (itmp = ns %aln->llen);	/* itmp = left over on last line */    if (itmp>aln->llen/2) ns += aln->llen;  /* more than 1/2 , use another*/    nd = ns - (mins+lenc);		/* this much extra */  }  else nd = aln->llcntx;  if (nd > max(n0-a_res->max0,nn1-a_res->max1))     nd = max(n0-a_res->max0,nn1-a_res->max1);    if (aln->showall==1) {    nd = max(n0-a_res->max0,nn1-a_res->max1);	/* reset for showall=1 */    /* get right end */    aancpy(seqc0+mins+lenc,(char *)aa0+a_res->max0,n0-a_res->max0,ppst);    aancpy(seqc1+mins+lenc,(char *)aa1p+a_res->max1,nn1-a_res->max1,ppst);    /* fill with blanks - this is required to use one 'nc' */    memset(seqc0+mins+lenc+n0-a_res->max0,' ',nd-(n0-a_res->max0));    memset(seqc1+mins+lenc+nn1-a_res->max1,' ',nd-(nn1-a_res->max1));  }  else {    if ((nd-(n0-a_res->max0))>0) {      aancpy(seqc0+mins+lenc,(char *)aa0+a_res->max0,(n0-a_res->max0),ppst);      memset(seqc0+mins+lenc+n0-a_res->max0,' ',nd-(n0-a_res->max0));    }    else aancpy(seqc0+mins+lenc,(char *)aa0+a_res->max0,nd,ppst);    if ((nd-(nn1-a_res->max1))>0) {      aancpy(seqc1+mins+lenc,(char *)aa1p+a_res->max1,nn1-a_res->max1,ppst);      memset(seqc1+mins+lenc+nn1-a_res->max1,' ',nd-(nn1-a_res->max1));    }    else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res->max1,nd,ppst);  }#else  nd = 0;#endif  /*  fprintf(stderr,"%d\n",mins+lenc+nd); */  lenc = mins + lenc + nd;  if (! have_ann) {    memset(seqc0a,' ',lenc);    seqc0a[lenc]='\0';  }  return lenc;}static voidupdate_code(char *al_str, int al_str_max, int op, int op_cnt) {  char op_char[5]={"=-+*"};  char tmp_cnt[20];  sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);  strncat(al_str,tmp_cnt,al_str_max);}/* build an array of match/ins/del - length strings */int calc_code(const unsigned char *aa0, int n0,	      const unsigned char *aa1, int n1,	      struct a_struct *aln,	      struct a_res_str *a_res,	      struct pstruct *ppst,	      char *al_str, int al_str_n, 	      const unsigned char *aa0a, const char *ann_arr, 	      char *ann_str, int ann_str_n,	      struct f_struct *f_str){  int i0, i1, nn1;  int op, lenc;  int p_op, op_cnt;  const unsigned char *aa1p;  char sp0, sp1, sp0a, *sq;  int *rp;  int have_ann;  char tmp_astr[MAX_STR];  int sim_code;  char *sim_sym= aln_map_sym[MX_ACC];  have_ann = (ann_str != NULL && ann_arr[0]!='\0' && aa0a != NULL);  if (ppst->ext_sq_set) {    sq = ppst->sqx;  }  else {    sq = ppst->sq;  }#ifndef TFASTA  aa1p = aa1;  nn1 = n1;#else  aa1p = f_str->aa1x;  nn1 = f_str->n10;#endif  aln->amin0 = a_res->min0;  aln->amax0 = a_res->max0;  aln->amin1 = a_res->min1;  aln->amax1 = a_res->max1;  rp = a_res->res;  lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = p_op = 0;  op_cnt = 0;  i0 = a_res->min0;  i1 = a_res->min1;    while (i0 < a_res->max0 || i1 < a_res->max1) {    if (op == 0 && *rp == 0) {      sim_code = M_NEG;      if (ppst->pam2[0][aa0[i0]][aa1p[i1]]>=0) { 	aln->nsim++;	sim_code = M_POS;      }      sp0 = sq[aa0[i0]];      sp1 = sq[aa1p[i1]];      if (p_op == 0 || p_op==3) {	if (sp0 != '*' && sp1 != '*') {	  if (p_op == 3) {	    update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);	    op_cnt = 1; p_op = 0;	  }	  else {op_cnt++;}	}	else {	  update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);	  op_cnt = 1; p_op = 3;	}      }      else {	update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);	op_cnt = 1; p_op = 0;      }      op = *rp++;      lenc++;      if (toupper(sp0) == toupper(sp1)) {	aln->nident++;	sim_code = M_IDENT;      }      else if (ppst->nt_align) {	if ((toupper(sp0) == 'T' && toupper(sp1) == 'U') ||	    (toupper(sp0)=='U' && toupper(sp1)=='T')) {	  aln->nident++; sim_code = M_IDENT;	}	else if (toupper(sp0) == 'N') aln->ngap_q++;	else if (toupper(sp1) == 'N') aln->ngap_l++;      }      /* check for an annotation */      if (have_ann && ann_arr[aa0a[i0]] != ' ') {	sprintf(tmp_astr, "|%d:%d:%c%c%c%c",		aln->q_offset+i0+1,aln->l_offset+i1+1,ann_arr[aa0a[i0]],sim_sym[sim_code],sp0,sp1);	strncat(ann_str, tmp_astr, ann_str_n - strlen(ann_str) - 1);	ann_str[ann_str_n-1] = '\0';      }      i0++; i1++;    }    else {      if (op==0) op = *rp++;      if (op>0) {	if (p_op == 1) { op_cnt++;}	else {	  update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);	  op_cnt = 1; p_op = 1;	}	op--; lenc++; i1++; aln->ngap_q++;      }      else {	if (p_op == 2) { op_cnt++;}	else {	  update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);	  op_cnt = 1; p_op = 2;	}	op++; lenc++; i0++; aln->ngap_l++;      }    }  }  update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);  return lenc;}int calc_id(const unsigned char *aa0, int n0,	    const unsigned char *aa1, int n1,	    struct a_struct *aln, 	    struct a_res_str *a_res,	    struct pstruct *ppst,	    struct f_struct *f_str){  int i0, i1, nn1;  int op, lenc;  int sp0, sp1;  const unsigned char *aa1p;  int *rp;  char *sq;    if (ppst->ext_sq_set) { sq = ppst->sqx; }  else { sq = ppst->sq; }#ifndef TFASTA  aa1p = aa1;  nn1 = n1;#else  aa1p = f_str->aa1x;  nn1 = f_str->n10;#endif  aln->amin0 = a_res->min0;  aln->amax0 = a_res->max0;  aln->amin1 = a_res->min1;  aln->amax1 = a_res->max1;  rp = a_res->res;  lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;  i0 = a_res->min0;  i1 = a_res->min1;  while (i0 < a_res->max0 || i1 < a_res->max1) {    if (op == 0 && *rp == 0) {      op = *rp++;      lenc++;      if (ppst->pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}      sp0 = sq[aa0[i0++]];      sp1 = sq[aa1p[i1++]];      if (toupper(sp0) == toupper(sp1)) {aln->nident++;}      else if (ppst->nt_align) {	if ((toupper(sp0)=='T' && toupper(sp1)== 'U')||	  (toupper(sp0)=='U' && toupper(sp1)=='T')) {aln->nident++;}	else if (toupper(sp0) == 'N') aln->ngap_q++;	else if (toupper(sp1) == 'N') aln->ngap_l++;      }    }    else {      if (op==0) op = *rp++;      if (op>0) {op--; lenc++; i1++; aln->ngap_q++;}      else {op++; lenc++; i0++; aln->ngap_l++; }    }  }  return lenc;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -