⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 res_stats.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 2 页
字号:
/* calculate stats from results file using scalesws.c */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <limits.h>#include <math.h>#define MAX_LLEN 200#define LN_FACT 10.0#include "defs.h"#include "structs.h"#include "param.h"struct beststr {  int score;	/* smith-waterman score */  int sscore;	/* duplicate for compatibility with fasta */  double comp;  double H;  double zscore;  double escore;  int n1;#ifndef USE_FTELLO  long lseek;	/* position in library file */#else  off_t lseek;#endif  int cont;	/* offset into sequence */  int frame;  int lib;  char libstr[13];} *bbp, *bestptr, **bptr, *best;struct stat_str {  int score;  int n1;  double comp;  double H;};static struct db_str qtt = {0l, 0l, 0};char info_gstring2[MAX_STR];                  /* string for label */char info_gstring3[MAX_STR];char info_hstring1[MAX_STR];FILE *outfd;int nbest;	/* number of sequences better than bestcut in best */int bestcut=1; 	/* cut off for getting into MAX_BEST */int bestfull;int dohist = 0;int zsflag = 1;int outtty=1;int llen=40;/* statistics functions */extern voidprocess_hist(struct stat_str *sptr, int nstat, struct pstruct pst,	     struct hist_str *hist, void **);extern void addhistz(double, struct hist_str *); /* scaleswn.c */void selectbestz(struct beststr **, int, int );extern double zs_to_E(double, int, int, long, struct db_str);extern double zs_to_Ec(double zs, long entries);extern double (*find_zp)(int score, int length, double comp, void *);void prhist(FILE *, struct mngmsg, struct pstruct, struct hist_str, 	    int, struct db_str, char *);int nshow=20, mshow=50, ashow= -1;double e_cut=10.0;main(argc, argv)     int argc; char **argv;{  FILE *fin;  char line[512];  int max, icol, iarg, i, qsfnum, lsfnum, n0, n1, s[3], frame;  double comp, H;  int idup, ndup, max_s;  char libstr[20], *bp;  char bin_file[80];  FILE *bout=NULL;  struct mngmsg m_msg;		/* Message from host to manager */  struct pstruct pst;  struct stat_str *stats;  int nstats;  double zscor, mu, var;#if defined(UNIX)  outtty = isatty(1);#else  outtty = 1;#endif  if (argc < 2 ) {    fprintf(stderr," useage - res_stats -c col -r bin_file file\n");    exit(1);  }  m_msg.db.length = qtt.length = 0l;  m_msg.db.entries = m_msg.db.carry = qtt.entries = qtt.carry = 0;  m_msg.pstat_void = NULL;  m_msg.hist.hist_a = NULL;  m_msg.nohist = 0;  m_msg.markx = 0;  pst.n0 = 200;		/* sensible dummy value */  pst.zsflag = 1;  pst.dnaseq = 0;  pst.histint = 2;  bin_file[0]='\0';  icol = 1;  iarg = 1;  ndup = 1;  while (1) {    if (argv[iarg][0]=='-' && argv[iarg][1]=='c') {      sscanf(argv[iarg+1],"%d",&icol);      iarg += 2;    }    else if (argv[iarg][0]=='-' && argv[iarg][1]=='r') {      strncpy(bin_file,argv[iarg+1],sizeof(bin_file));      iarg += 2;    }    else if (argv[iarg][0]=='-' && argv[iarg][1]=='z') {      sscanf(argv[iarg+1],"%d",&pst.zsflag);      iarg += 2;    }    else if (argv[iarg][0]=='-' && argv[iarg][1]=='n') {      pst.dnaseq = 1;      iarg += 1;    }    else if (argv[iarg][0]=='-' && argv[iarg][1]=='s') {      sscanf(argv[iarg+1],"%d",&ndup);      iarg += 2;    }    else if (argv[iarg][0]=='-' && argv[iarg][1]=='q') {      outtty = 0;      iarg += 1;    }    else break;  }  icol--;  if ((fin=fopen(argv[iarg],"r"))==NULL) {    fprintf(stderr," cannot open %s\n",argv[1]);    exit(1);  }  if (bin_file[0]!='\0' && ((bout=fopen(bin_file,"w"))==NULL)) {    fprintf(stderr,"cannot open %s for output\n",bin_file);  }  if ((stats =       (struct stat_str *)malloc((MAX_STATS)*sizeof(struct stat_str)))==NULL)    s_abort ("Cannot allocate stats struct","");  nstats = 0;  initbest(MAX_BEST+1);	/* +1 required for select() */  for (nbest=0; nbest<MAX_BEST+1; nbest++)    bptr[nbest] = &best[nbest];  bptr++; best++;  best[-1].score= BIGNUM;    nbest = 0;  pst.Lambda=0.232;  pst.K = 0.11;  pst.H = 0.34;  /* read the best scores from the results file */  max_s = -1;  idup = 0;  /* get first line with sequence length */  fgets(line,sizeof(line),fin);  sscanf(line,"%d",&n0);  if (n0 > 0) pst.n0 = n0;  while (fgets(line,sizeof(line),fin)!=NULL) {    if (line[0]=='/' && line[1]=='*') {      fputs(line,stdout);      strncpy(info_gstring2,line,sizeof(info_gstring2));      if ((bp=strchr(info_gstring2,'\n'))!=NULL) *bp = '\0';      break;    }    if (line[0]==';') {      if ((bp=strchr(line,'|'))!=NULL) qsfnum = atoi(bp+1);      else continue;      if ((bp=strchr(line,'('))!=NULL) {	n0 = atoi(bp+1);	pst.n0 = n0;      }      else {	fprintf(stderr, "cannot find n0:\n %s\n",line);	continue;      }    }    else {	sscanf(line,"%s %d %d %d %lf %lf %d %d %d",	       libstr,&lsfnum,&n1,&frame,&comp, &H, &s[0],&s[1],&s[2]);	if (lsfnum==0 && n1==0) {	  fputs(line,stderr);	  continue;	}	if (n1 < 10 || s[icol]<=0) fputs(line,stderr);	idup++;	if (s[icol] > max_s) max_s = s[icol];	if (idup < ndup) continue;	m_msg.db.entries++;	m_msg.db.length += n1;	if (dohist) addhistz(zscor=(*find_zp)(max_s,n1,comp,m_msg.pstat_void),			     &m_msg.hist);	else zscor = (double)max_s;	if (nstats < MAX_STATS) {	  stats[nstats].n1 = n1;	  stats[nstats].comp = comp;	  stats[nstats].H = H;	  stats[nstats++].score = max_s;	}	else if (!dohist) {	  /*	  do_bout(bout,stats,nstats); */	  process_hist(stats,nstats,pst,&m_msg.hist, &m_msg.pstat_void);	  for (i=0; i<nbest; i++)	    bptr[i]->zscore = 	      (*find_zp)(bptr[i]->score,bptr[i]->n1,bptr[i]->comp,			 m_msg.pstat_void);	  dohist = 1;	}	if (dohist) {	  zscor =(*find_zp)(max_s,n1,comp,m_msg.pstat_void);	  addhistz(zscor,&m_msg.hist);	}	else zscor = (double)max_s;	if (nbest >= MAX_BEST) {	  bestfull = nbest-MAX_BEST/4;	  selectz(bestfull-1,nbest);	  bestcut = (int)(bptr[bestfull-1]->zscore+0.5);	  nbest = bestfull;	}	bestptr = bptr[nbest];	bestptr->score = max_s;	bestptr->sscore = max_s;	bestptr->n1 = n1;	bestptr->comp = comp;	bestptr->H = H;	bestptr->lib = lsfnum;	bestptr->zscore = zscor;	strncpy(bestptr->libstr,libstr,12);	bestptr->libstr[12]='\0';	nbest++;	max_s = -1;	idup = 0;    }  }	/* done with reading results */  if (!dohist) {    if (nbest < 20) {      zsflag = 0;    }    else {      /*      do_bout(bout,stats,nstats); */      process_hist(stats,nstats,pst,&m_msg.hist,&m_msg.pstat_void);      for (i=0; i<nbest; i++)	bptr[i]->zscore = 	  (*find_zp)(bptr[i]->score,bptr[i]->n1,bptr[i]->comp,m_msg.pstat_void);      dohist = 1;    }  }    printf(" using n0: %d\n",pst.n0);  /* print histogram, statistics */  m_msg.nbr_seq = m_msg.db.entries;  pst.zdb_size = m_msg.db.entries;  /* get_param(&pst, info_gstring2,info_gstring3); */  prhist(stdout,m_msg,pst,m_msg.hist,nstats,m_msg.db,info_gstring2);  if (!zsflag) sortbest();  else {    sortbestz(bptr,nbest);    for (i=0; i<nbest; i++)      bptr[i]->escore = zs_to_E(bptr[i]->zscore,bptr[i]->n1,pst.dnaseq,				pst.zdb_size, m_msg.db);  }    outfd = stdout;  showbest(m_msg.db);	/* display best matches */}initbest(nbest)		/* allocate arrays for best sort */     int nbest;{  if ((best=(struct beststr *)calloc((size_t)nbest,sizeof(struct beststr)))      == NULL) {fprintf(stderr,"cannot allocate best struct\n"); exit(1);}  if ((bptr=(struct beststr **)calloc((size_t)nbest,sizeof(struct beststr *)))      == NULL) {fprintf(stderr,"cannot allocate bptr\n"); exit(1);}}voidprhist(FILE *fd, struct mngmsg m_msg,       struct pstruct pst,        struct hist_str hist,        int nstats,       struct db_str ntt,       char *info_gstring2){  int i,j,hl,hll, el, ell, ev;  char hline[80], pch, *bp;  int mh1, mht;  int maxval, maxvalt, dotsiz, ddotsiz,doinset;  double cur_e, prev_e, f_int;  double max_dev, x_tmp;  double db_tt;  int n_chi_sq, cum_hl, max_i;  fprintf(fd,"\n");    if (pst.zsflag < 0 || nstats <= 10) {    fprintf(fd, "%7ld residues in %5ld sequences\n", ntt.length,ntt.entries);    fprintf(fd,"\n%s\n",info_gstring2);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -