📄 res_stats.c
字号:
/* calculate stats from results file using scalesws.c */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <limits.h>#include <math.h>#define MAX_LLEN 200#define LN_FACT 10.0#include "defs.h"#include "structs.h"#include "param.h"struct beststr { int score; /* smith-waterman score */ int sscore; /* duplicate for compatibility with fasta */ double comp; double H; double zscore; double escore; int n1;#ifndef USE_FTELLO long lseek; /* position in library file */#else off_t lseek;#endif int cont; /* offset into sequence */ int frame; int lib; char libstr[13];} *bbp, *bestptr, **bptr, *best;struct stat_str { int score; int n1; double comp; double H;};static struct db_str qtt = {0l, 0l, 0};char info_gstring2[MAX_STR]; /* string for label */char info_gstring3[MAX_STR];char info_hstring1[MAX_STR];FILE *outfd;int nbest; /* number of sequences better than bestcut in best */int bestcut=1; /* cut off for getting into MAX_BEST */int bestfull;int dohist = 0;int zsflag = 1;int outtty=1;int llen=40;/* statistics functions */extern voidprocess_hist(struct stat_str *sptr, int nstat, struct pstruct pst, struct hist_str *hist, void **);extern void addhistz(double, struct hist_str *); /* scaleswn.c */void selectbestz(struct beststr **, int, int );extern double zs_to_E(double, int, int, long, struct db_str);extern double zs_to_Ec(double zs, long entries);extern double (*find_zp)(int score, int length, double comp, void *);void prhist(FILE *, struct mngmsg, struct pstruct, struct hist_str, int, struct db_str, char *);int nshow=20, mshow=50, ashow= -1;double e_cut=10.0;main(argc, argv) int argc; char **argv;{ FILE *fin; char line[512]; int max, icol, iarg, i, qsfnum, lsfnum, n0, n1, s[3], frame; double comp, H; int idup, ndup, max_s; char libstr[20], *bp; char bin_file[80]; FILE *bout=NULL; struct mngmsg m_msg; /* Message from host to manager */ struct pstruct pst; struct stat_str *stats; int nstats; double zscor, mu, var;#if defined(UNIX) outtty = isatty(1);#else outtty = 1;#endif if (argc < 2 ) { fprintf(stderr," useage - res_stats -c col -r bin_file file\n"); exit(1); } m_msg.db.length = qtt.length = 0l; m_msg.db.entries = m_msg.db.carry = qtt.entries = qtt.carry = 0; m_msg.pstat_void = NULL; m_msg.hist.hist_a = NULL; m_msg.nohist = 0; m_msg.markx = 0; pst.n0 = 200; /* sensible dummy value */ pst.zsflag = 1; pst.dnaseq = 0; pst.histint = 2; bin_file[0]='\0'; icol = 1; iarg = 1; ndup = 1; while (1) { if (argv[iarg][0]=='-' && argv[iarg][1]=='c') { sscanf(argv[iarg+1],"%d",&icol); iarg += 2; } else if (argv[iarg][0]=='-' && argv[iarg][1]=='r') { strncpy(bin_file,argv[iarg+1],sizeof(bin_file)); iarg += 2; } else if (argv[iarg][0]=='-' && argv[iarg][1]=='z') { sscanf(argv[iarg+1],"%d",&pst.zsflag); iarg += 2; } else if (argv[iarg][0]=='-' && argv[iarg][1]=='n') { pst.dnaseq = 1; iarg += 1; } else if (argv[iarg][0]=='-' && argv[iarg][1]=='s') { sscanf(argv[iarg+1],"%d",&ndup); iarg += 2; } else if (argv[iarg][0]=='-' && argv[iarg][1]=='q') { outtty = 0; iarg += 1; } else break; } icol--; if ((fin=fopen(argv[iarg],"r"))==NULL) { fprintf(stderr," cannot open %s\n",argv[1]); exit(1); } if (bin_file[0]!='\0' && ((bout=fopen(bin_file,"w"))==NULL)) { fprintf(stderr,"cannot open %s for output\n",bin_file); } if ((stats = (struct stat_str *)malloc((MAX_STATS)*sizeof(struct stat_str)))==NULL) s_abort ("Cannot allocate stats struct",""); nstats = 0; initbest(MAX_BEST+1); /* +1 required for select() */ for (nbest=0; nbest<MAX_BEST+1; nbest++) bptr[nbest] = &best[nbest]; bptr++; best++; best[-1].score= BIGNUM; nbest = 0; pst.Lambda=0.232; pst.K = 0.11; pst.H = 0.34; /* read the best scores from the results file */ max_s = -1; idup = 0; /* get first line with sequence length */ fgets(line,sizeof(line),fin); sscanf(line,"%d",&n0); if (n0 > 0) pst.n0 = n0; while (fgets(line,sizeof(line),fin)!=NULL) { if (line[0]=='/' && line[1]=='*') { fputs(line,stdout); strncpy(info_gstring2,line,sizeof(info_gstring2)); if ((bp=strchr(info_gstring2,'\n'))!=NULL) *bp = '\0'; break; } if (line[0]==';') { if ((bp=strchr(line,'|'))!=NULL) qsfnum = atoi(bp+1); else continue; if ((bp=strchr(line,'('))!=NULL) { n0 = atoi(bp+1); pst.n0 = n0; } else { fprintf(stderr, "cannot find n0:\n %s\n",line); continue; } } else { sscanf(line,"%s %d %d %d %lf %lf %d %d %d", libstr,&lsfnum,&n1,&frame,&comp, &H, &s[0],&s[1],&s[2]); if (lsfnum==0 && n1==0) { fputs(line,stderr); continue; } if (n1 < 10 || s[icol]<=0) fputs(line,stderr); idup++; if (s[icol] > max_s) max_s = s[icol]; if (idup < ndup) continue; m_msg.db.entries++; m_msg.db.length += n1; if (dohist) addhistz(zscor=(*find_zp)(max_s,n1,comp,m_msg.pstat_void), &m_msg.hist); else zscor = (double)max_s; if (nstats < MAX_STATS) { stats[nstats].n1 = n1; stats[nstats].comp = comp; stats[nstats].H = H; stats[nstats++].score = max_s; } else if (!dohist) { /* do_bout(bout,stats,nstats); */ process_hist(stats,nstats,pst,&m_msg.hist, &m_msg.pstat_void); for (i=0; i<nbest; i++) bptr[i]->zscore = (*find_zp)(bptr[i]->score,bptr[i]->n1,bptr[i]->comp, m_msg.pstat_void); dohist = 1; } if (dohist) { zscor =(*find_zp)(max_s,n1,comp,m_msg.pstat_void); addhistz(zscor,&m_msg.hist); } else zscor = (double)max_s; if (nbest >= MAX_BEST) { bestfull = nbest-MAX_BEST/4; selectz(bestfull-1,nbest); bestcut = (int)(bptr[bestfull-1]->zscore+0.5); nbest = bestfull; } bestptr = bptr[nbest]; bestptr->score = max_s; bestptr->sscore = max_s; bestptr->n1 = n1; bestptr->comp = comp; bestptr->H = H; bestptr->lib = lsfnum; bestptr->zscore = zscor; strncpy(bestptr->libstr,libstr,12); bestptr->libstr[12]='\0'; nbest++; max_s = -1; idup = 0; } } /* done with reading results */ if (!dohist) { if (nbest < 20) { zsflag = 0; } else { /* do_bout(bout,stats,nstats); */ process_hist(stats,nstats,pst,&m_msg.hist,&m_msg.pstat_void); for (i=0; i<nbest; i++) bptr[i]->zscore = (*find_zp)(bptr[i]->score,bptr[i]->n1,bptr[i]->comp,m_msg.pstat_void); dohist = 1; } } printf(" using n0: %d\n",pst.n0); /* print histogram, statistics */ m_msg.nbr_seq = m_msg.db.entries; pst.zdb_size = m_msg.db.entries; /* get_param(&pst, info_gstring2,info_gstring3); */ prhist(stdout,m_msg,pst,m_msg.hist,nstats,m_msg.db,info_gstring2); if (!zsflag) sortbest(); else { sortbestz(bptr,nbest); for (i=0; i<nbest; i++) bptr[i]->escore = zs_to_E(bptr[i]->zscore,bptr[i]->n1,pst.dnaseq, pst.zdb_size, m_msg.db); } outfd = stdout; showbest(m_msg.db); /* display best matches */}initbest(nbest) /* allocate arrays for best sort */ int nbest;{ if ((best=(struct beststr *)calloc((size_t)nbest,sizeof(struct beststr))) == NULL) {fprintf(stderr,"cannot allocate best struct\n"); exit(1);} if ((bptr=(struct beststr **)calloc((size_t)nbest,sizeof(struct beststr *))) == NULL) {fprintf(stderr,"cannot allocate bptr\n"); exit(1);}}voidprhist(FILE *fd, struct mngmsg m_msg, struct pstruct pst, struct hist_str hist, int nstats, struct db_str ntt, char *info_gstring2){ int i,j,hl,hll, el, ell, ev; char hline[80], pch, *bp; int mh1, mht; int maxval, maxvalt, dotsiz, ddotsiz,doinset; double cur_e, prev_e, f_int; double max_dev, x_tmp; double db_tt; int n_chi_sq, cum_hl, max_i; fprintf(fd,"\n"); if (pst.zsflag < 0 || nstats <= 10) { fprintf(fd, "%7ld residues in %5ld sequences\n", ntt.length,ntt.entries); fprintf(fd,"\n%s\n",info_gstring2);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -