⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 compacc.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 2 页
字号:
/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the   U. of Virginia *//* $Name: fa35_03_06 $ - $Id: compacc.c,v 1.70 2008/01/11 16:19:01 wrp Exp $ *//* Concurrent read version */#include <stdio.h>#include <stdlib.h>#if defined(UNIX) || defined(WIN32)#include <sys/types.h>#endif#include <limits.h>#include <float.h>#include <string.h>#include <time.h>#include <math.h>#include "defs.h"#include "param.h"#include "structs.h"#ifndef PCOMPLIB#include "mw.h"#else#include "p_mw.h"#endif#define XTERNAL#include "uascii.h"#include "upam.h"#undef XTERNAL#ifdef PCOMPLIB#include "msg.h"extern int nnodes;#ifdef PVM_SRC#include "pvm3.h"extern int pinums[],hosttid;#endif#ifdef MPI_SRC#include "mpi.h"#endif#endifextern time_t tdone, tstart;		/* Timing */extern void abort ();extern void ptime ();/* because it is used to pre-allocate space, maxn has various   constraints.  For "simple" comparisons, it is simply the length of   the longest library sequence.  But for translated comparisons, it   must be 3 or 6X the length of the query sequence.    In addition, however, it can be reduced to make certain that   sequences are read in smaller chunks.  And, maxn affect how large   overlaps must be when sequences are read in chunks.*/intreset_maxn(struct mngmsg *m_msg, int maxn) {  /* reduce maxn if requested */  if (m_msg->maxn > 0 && m_msg->maxn < maxn) maxn = m_msg->maxn;  if (m_msg->qdnaseq==m_msg->ldnaseq || m_msg->qdnaseq==SEQT_DNA ||      m_msg->qdnaseq == SEQT_RNA) {/* !TFAST - either FASTA or FASTX*/   if (m_msg->n0> m_msg->max_tot/3) {      fprintf(stderr," query sequence is too long %d > %d %s\n",	      m_msg->n0,	      m_msg->max_tot/3,	      m_msg->sqnam);      exit(1);    }    m_msg->l_overlap = m_msg->n0;    m_msg->maxt3 = maxn-m_msg->l_overlap;  }  else {	/* is TFAST */    if (m_msg->n0 > MAXTST) {      fprintf(stderr," query sequence is too long %d %s\n",m_msg->n0,m_msg->sqnam);      exit(1);    }    if (m_msg->n0*3 > maxn ) {	/* n0*3 for the three frames - this				   will only happen if maxn has been				   set low manually */      if (m_msg->n0*4+2 < m_msg->max_tot) { /* m_msg0*3 + m_msg0 */	fprintf(stderr,		" query sequence too long for library segment: %d - resetting to %d\n",	      maxn,m_msg->n0*3);	maxn = m_msg->maxn = m_msg->n0*3;      }      else {	fprintf(stderr," query sequence too long for translated search: %d * 4 > %d %s\n",	      m_msg->n0,maxn, m_msg->sqnam);	exit(1);      }    }    /* set up some constants for overlaps */    m_msg->l_overlap = 3*m_msg->n0;    m_msg->maxt3 = maxn-m_msg->l_overlap-3;    m_msg->maxt3 -= m_msg->maxt3%3;    m_msg->maxt3++;    maxn = maxn - 3; maxn -= maxn%3; maxn++;  }  return maxn;}intscanseq(unsigned char *seq, int n, char *str) {  int tot,i;  char aaray[128];		/* this must be set > nsq */	  for (i=0; i<128; i++)  aaray[i]=0;  for (i=0; (size_t)i < strlen(str); i++) aaray[qascii[str[i]]]=1;  for (i=tot=0; i<n; i++) tot += aaray[seq[i]];  return tot;}/* subs_env takes a string, possibly with ${ENV}, and looks up all the   potential environment variables and substitutes them into the   string */void subs_env(char *dest, char *src, int dest_size) {  char *last_src, *bp, *bp1;  last_src = src;  if ((bp = strchr(src,'$'))==NULL) {    strncpy(dest, src, dest_size);    dest[dest_size-1] = '\0';  }  else {    *dest = '\0';    while (strlen(dest) < dest_size-1 && bp != NULL ) {      /* copy stuff before ${*/      *bp = '\0';      strncpy(dest, last_src, dest_size);      *bp = '$';      /* copy ENV */      if (*(bp+1) != '{') {	strncat(dest, "$", dest_size - strlen(dest) -1);	dest[dest_size-1] = '\0';	bp += 1;      }      else {	/* have  ${ENV} - put it in */	if ((bp1 = strchr(bp+2,'}'))==NULL) {	  fprintf(stderr, "Unterminated ENV: %s\n",src);	  break;	}	else {	  *bp1 = '\0';	  if (getenv(bp+2)!=NULL) {	    strncat(dest, getenv(bp+2), dest_size - strlen(dest) - 1);	    dest[dest_size-1] = '\0';	    *bp1 = '}';	  }	  bp = bp1+1;	/* bump bp even if getenv == NULL */	}      }      last_src = bp;      /* now get the next ${ENV} if present */      bp = strchr(last_src,'$');    }    /* now copy the last stuff */    strncat(dest, last_src, dest_size - strlen(dest) - 1);    dest[dest_size-1]='\0';  }}void selectbest(bptr,k,n)	/* k is rank in array */     struct beststr **bptr;     int k,n;{  int v, i, j, l, r;  struct beststr *tmptr;  l=0; r=n-1;  while ( r > l ) {    v = bptr[r]->rst.score[0];    i = l-1;    j = r;    do {      while (bptr[++i]->rst.score[0] > v) ;      while (bptr[--j]->rst.score[0] < v) ;      tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;    } while (j > i);    bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;    if (i>=k) r = i-1;    if (i<=k) l = i+1;  }}void selectbestz(bptr,k,n)	/* k is rank in array */     struct beststr **bptr;     int k,n;{  int i, j, l, r;  struct beststr *tmptr;  double v;  l=0; r=n-1;  while ( r > l ) {    v = bptr[r]->zscore;    i = l-1;    j = r;    do {      while (bptr[++i]->zscore > v) ;      while (bptr[--j]->zscore < v) ;      tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;    } while (j > i);    bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;    if (i>=k) r = i-1;    if (i<=k) l = i+1;  }}/* improved shellsort with high-performance increments *//*shellsort(itemType a[], int l, int r){ int i, j, k, h; itemType v; int incs[16] = { 1391376, 463792, 198768, 86961, 33936,		  13776, 4592, 1968, 861, 336, 		  112, 48, 21, 7, 3, 1 }; for ( k = 0; k < 16; k++)   for (h = incs[k], i = l+h; i <= r; i++)     {        v = a[i]; j = i;       while (j > h && a[j-h] > v)	 { a[j] = a[j-h]; j -= h; }       a[j] = v;      } }*//* ?improved? version of sortbestz using optimal increments and fewer   exchanges */void sortbestz(struct beststr **bptr, int nbest){  int gap, i, j, k;  struct beststr *tmp;  double v;  int incs[16] = { 1391376, 463792, 198768, 86961, 33936,		   13776, 4592, 1968, 861, 336, 		   112, 48, 21, 7, 3, 1 };  for ( k = 0; k < 16; k++) {    gap = incs[k];    for (i=gap; i < nbest; i++) {      tmp = bptr[i];      j = i;      v = bptr[i]->zscore;      while ( j >= gap && bptr[j-gap]->zscore < v) {	bptr[j] = bptr[j - gap];	j -= gap;      }      bptr[j] = tmp;    }  }}void sortbeste(struct beststr **bptr, int nbest){  int gap, i, j, k;  struct beststr *tmp;  double v;  int incs[16] = { 1391376, 463792, 198768, 86961, 33936,		   13776, 4592, 1968, 861, 336, 		   112, 48, 21, 7, 3, 1 };  for ( k = 0; k < 16; k++) {    gap = incs[k];     for (i=gap; i < nbest; i++) {      j = i;      tmp = bptr[i];      v = tmp->rst.escore;      while ( j >= gap && bptr[j-gap]->rst.escore > v) {	bptr[j] = bptr[j - gap];	j -= gap;      }      bptr[j] = tmp;    }  }  /* sometimes there are many high scores with E()==0.0, sort     those by z() score */  j = 0;  while (j < nbest && bptr[j]->rst.escore <= 2.0*DBL_MIN ) {j++;}  if (j > 1) sortbestz(bptr,j);}extern double zs_to_Ec(double zs, long entries);/*extern double ks_dev;extern int ks_df; */voidprhist(FILE *fd, struct mngmsg m_msg,       struct pstruct *ppst,        struct hist_str hist,        int nstats,       struct db_str ntt,       char *lib_range,       char **info_gstring2,       char **info_hstring){  int i,j,hl,hll, el, ell, ev;  char hline[80], pch, *bp;  int mh1, mht;  int maxval, maxvalt, dotsiz, ddotsiz,doinset;  double cur_e, prev_e, f_int;  double max_dev, x_tmp;  double db_tt;  int n_chi_sq, cum_hl=0, max_i=0;  fprintf(fd,"\n");    if (ppst->zsflag_f < 0) {    fprintf(fd, "%7ld residues in %5ld sequences", ntt.length,ntt.entries);    fprintf(fd, "%s\n",lib_range);    fprintf(fd,"Algorithm: %s\nParameters: %s\n",info_gstring2[0],info_gstring2[1]);    return;  }  if (nstats > 20) {     max_dev = 0.0;    mh1 = hist.maxh-1;    mht = (3*hist.maxh-3)/4 - 1;    if (!m_msg.nohist && mh1 > 0) {      for (i=0,maxval=0,maxvalt=0; i<hist.maxh; i++) {	if (hist.hist_a[i] > maxval) maxval = hist.hist_a[i];	if (i >= mht &&  hist.hist_a[i]>maxvalt) maxvalt = hist.hist_a[i];      }      n_chi_sq = 0;      cum_hl = -hist.hist_a[0];      dotsiz = (maxval-1)/60+1;      ddotsiz = (maxvalt-1)/50+1;      doinset = (ddotsiz < dotsiz && dotsiz > 2);      if (ppst->zsflag_f>=0)	fprintf(fd,"       opt      E()\n");      else 	fprintf(fd,"     opt\n");      prev_e =  zs_to_Ec((double)(hist.min_hist-hist.histint/2),hist.entries);      for (i=0; i<=mh1; i++) {	pch = (i==mh1) ? '>' : ' ';	pch = (i==0) ? '<' : pch;	hll = hl = hist.hist_a[i];	if (ppst->zsflag_f>=0) {	  cum_hl += hl;	  f_int = (double)(i*hist.histint+hist.min_hist)+(double)hist.histint/2.0;	  cur_e = zs_to_Ec(f_int,hist.entries);	  ev = el = ell = (int)(cur_e - prev_e + 0.5);	  if (hl > 0  && i > 5 && i < (90-hist.min_hist)/hist.histint) {	    x_tmp  = fabs(cum_hl - cur_e);	    if ( x_tmp > max_dev) {	      max_dev = x_tmp;	      max_i = i;	    }	    n_chi_sq++;	  }	  if ((el=(el+dotsiz-1)/dotsiz) > 60) el = 60;	  if ((ell=(ell+ddotsiz-1)/ddotsiz) > 40) ell = 40;	  fprintf(fd,"%c%3d %5d %5d:",		  pch,(i<mh1)?(i)*hist.histint+hist.min_hist :		  mh1*hist.histint+hist.min_hist,hl,ev);	}	else fprintf(fd,"%c%3d %5d :",		     pch,(i<mh1)?(i)*hist.histint+hist.min_hist :		     mh1*hist.histint+hist.min_hist,hl);	if ((hl=(hl+dotsiz-1)/dotsiz) > 60) hl = 60;	if ((hll=(hll+ddotsiz-1)/ddotsiz) > 40) hll = 40;	for (j=0; j<hl; j++) hline[j]='='; 	if (ppst->zsflag_f>=0) {	  if (el <= hl ) {	    if (el > 0) hline[el-1]='*';	    hline[hl]='\0';	  }	  else {	    for (j = hl; j < el; j++) hline[j]=' ';	    hline[el-1]='*';	    hline[hl=el]='\0';	  }	}	else hline[hl] = 0;	if (i==1) {	  for (j=hl; j<10; j++) hline[j]=' ';	  sprintf(&hline[10]," one = represents %d library sequences",dotsiz);	}	if (doinset && i == mht-2) {	  for (j = hl; j < 10; j++) hline[j]=' ';	  sprintf(&hline[10]," inset = represents %d library sequences",ddotsiz);	}	if (i >= mht&& doinset ) {	  for (j = hl; j < 10; j++) hline[j]=' ';	  hline[10]=':';	  for (j = 11; j<11+hll; j++) hline[j]='=';	  hline[11+hll]='\0';	  if (ppst->zsflag_f>=0) {	    if (ell <= hll) hline[10+ell]='*';	    else {	      for (j = 11+hll; j < 10+ell; j++) hline[j]=' ';	      hline[10+ell] = '*';	      hline[11+ell] = '\0';	    }	  }	}	fprintf(fd,"%s\n",hline);	prev_e = cur_e;      }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -