⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 comp_lib2.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 5 页
字号:
    /* rline[] is a tmp string */    if (m_msg.qdnaseq == SEQT_DNA || m_msg.qdnaseq == SEQT_RNA) {      strncpy(rline,(m_msg.qframe==1)? " (forward-only)" : "\0",sizeof(rline));      rline[sizeof(rline)-1]='\0';    }    else rline[0]='\0';    leng = (int)strlen(m_msg.qtitle);    if (leng > 50) leng -= 10;    sprintf (&m_msg.qtitle[leng], " %d %s", m_msg.n0, m_msg.sqnam);    m_msg.seqnm = 0;    if (m_msg.std_output) {#ifdef COMP_MLIB      printf("%3d>>>%s - %d %s%s\n", qlib,	     m_msg.qtitle, m_msg.n0, m_msg.sqnam,	     (m_msg.revcomp ? " (reverse complement)" : rline));#else      printf("%.50s: %d %s%s\n %s\n",	     info_qlabel, m_msg.n0, m_msg.sqnam,	     (m_msg.revcomp ? " (reverse complement)" : rline));#endif      /* check for annotation */      if (m_msg.q_ann_flg) {	printf("Annotation: ");	for (j=0; j<m_msg.n0; j++) {	  if (m_msg.aa0a[j] && m_msg.ann_arr[m_msg.aa0a[j]] != ' ' ) {	    printf("|%d:%c%c",		   j+1,m_msg.ann_arr[m_msg.aa0a[j]],pst.sq[aa0[0][j]]);	  }	}	printf("\n");      }      printf("Library: %.60s", m_msg.ltitle);      fflush(stdout);    }    n_libstr=MAX_UID;    fflush(outfd);    tprev = s_time();      if (fdata) fprintf(fdata,">>>%ld %3d\t%-50s\n",qtt.entries,m_msg.n0,m_msg.qtitle);    qtt.length += m_msg.n0;    qtt.entries++;#ifndef COMP_THR    /* initialize the comparison function, returning f_str */    init_work (aa0[0], m_msg.n0, &pst, &f_str[0]);    have_f_str=1;    f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] =  f_str[0];    /* label library size limits */    if (pst.n1_low > 0 && pst.n1_high < BIGNUM)       sprintf(info_lib_range," (range: %d-%d)",pst.n1_low,pst.n1_high);    else if (pst.n1_low > 0)       sprintf(info_lib_range," (range: >%d)",pst.n1_low);    else if (pst.n1_high < BIGNUM)      sprintf(info_lib_range," (range: <%d)",pst.n1_high);    else      info_lib_range[0]='\0';    info_lib_range[sizeof(info_lib_range)-1]='\0';    info_lib_range_p = info_lib_range;    if (m_msg.qframe == 2) {      init_work ( aa0[1], m_msg.n0, &pst, &f_str[1]);    }    if (m_msg.qshuffle) {      init_work ( aa0s, m_msg.n0, &pst, &qf_str);    }    lib_bhead_p = lib_buf2_list;	/* equivalent to un-threaded get_rbuf() */#else	/* COMP_THR */    start_thr();    /* now open the library and start reading */    /* get a buffer and fill it up */    get_rbuf(&lib_bhead_p,max_work_buf);    empty_reader_bufs++;#endif    lib_bhead_p->buf2_cnt = 0;    lib_bhead_p->have_results = 0;    lib_bhead_p->buf2_type=BUF2_DOWORK;    if (pst.zsflag > 10) {      lib_bhead_p->buf2_type |= BUF2_DOSHUF;    }    lib_buf2_p = lib_bhead_p->buf2;    ntbuff = 0;    /* open the library - start the search */    for (iln = 0; iln < m_msg.nln; iln++) {      if ((m_msg.lb_mfd[iln] = m_file_p=	   openlib(m_msg.lbnames[iln], m_msg.ldnaseq, lascii, !m_msg.quiet, m_msg.lb_mfd[iln]))	  ==NULL) {	fprintf(stderr," cannot open library %s\n",m_msg.lbnames[iln]);	continue;      }      loffset = 0l;      lcont = 0;      ocont = 0;      n1tot_v = n1tot_cnt = 0;      n1tot_cur = n1tot_ptr = NULL;      /* get next buffer to read into */      maxt = maxn;      /* read sequence directly into buffer */      current_seq_p = lib_buf2_p->seq;      aa1ptr = aa1 = current_seq_p->aa1b;      while ((n1=GETLIB(aa1ptr, maxt, libstr, n_libstr,			&lseek,	&lcont, m_file_p,			&(current_seq_p->l_off)))>=0) {	/* check for subset on library*/	if (m_file_p->opt_text[0]!='\0') {	  if (m_file_p->opt_text[0]=='-') {	    lstart=0; sscanf(&m_file_p->opt_text[1],"%d",&lstop);	  }	  else {	    sscanf(&m_file_p->opt_text[0],"%d-%d",&lstart,&lstop);	    lstart--;	    if (lstop <= 0 ) lstop = BIGNUM;	  }	  for (id=0,is=lstart; is<min(n1,lstop); ) aa1[id++]=aa1[is++];	  aa1[id]=0;	  n1 = min(n1,lstop)-lstart;	  current_seq_p->l_off += lstart;	}	current_seq_p->n1 = n1;	current_seq_p->m_file_p = (void *)m_file_p;	current_seq_p->cont = ocont+1;	current_seq_p->l_offset = loffset;	current_seq_p->lseek = lseek;#ifdef SUPERFAMNUM	current_seq_p->nsfnum = nsfnum;	if ((current_seq_p->sfnum[0]=sfnum[0])>0 &&	    (current_seq_p->sfnum[1]=sfnum[1])>0 &&	    (current_seq_p->sfnum[2]=sfnum[2])>0 &&	    (current_seq_p->sfnum[3]=sfnum[3])>0 &&	    (current_seq_p->sfnum[4]=sfnum[4])>0 &&	    (current_seq_p->sfnum[5]=sfnum[5])>0 &&	    (current_seq_p->sfnum[6]=sfnum[6])>0 &&	    (current_seq_p->sfnum[7]=sfnum[7])>0 &&	    (current_seq_p->sfnum[8]=sfnum[8])>0 &&	    (current_seq_p->sfnum[9]=sfnum[9])>0) ;#endif	if ((bp=strchr(libstr,' '))!=NULL) *bp='\0';	strncpy(current_seq_p->libstr,libstr,MAX_UID);	/* get old libstr for lcont>0 */	if (m_msg.term_code && !lcont &&	    m_msg.ldnaseq==SEQT_PROT && aa1ptr[n1-1]!=m_msg.term_code) {	  aa1ptr[n1++]=m_msg.term_code;	  aa1ptr[n1]=0;	}#ifdef DEBUG	if (n_libstr <= MAX_UID) {	  if ((bp=strchr(current_seq_p->libstr,' '))!=NULL) *bp='\0';	}	if (aa1[-1]!='\0' || aa1ptr[n1]!='\0') {	  fprintf(stderr,"%s: aa1[%d] at %ld:%lld  missing NULL boundaries: %d %d\n",		  current_seq_p->libstr,n1,		  m_msg.db.entries+1,current_seq_p->lseek,		  aa1[-1],aa1ptr[n1]);	}#endif	/* check for a continued sequence and provide a pointer to 	   the n1_tot array if lcont || ocont */	n1tot_v += n1;	if (lcont && !ocont) {	/* get a new pointer */	  if (n1tot_cnt <= 0) {	    if ((n1tot_ptr=calloc(1000,sizeof(int)))==NULL) {	      fprintf(stderr," cannot allocate n1tot_ptr\n");	      exit(1);	    }	    else {n1tot_cnt=1000;}	  }	  n1tot_cnt--;	  n1tot_cur = n1tot_ptr++;	}	current_seq_p->n1tot_p = n1tot_cur;	/* skip based on size range */	/*	if (n1tot_v < m_msg.n1_low || n1tot_v > m_msg.n1_high) {	  goto loop2;	}	*/	m_msg.db.entries++;	m_msg.db.length += n1;	if (m_msg.db.length > LONG_MAX) {	  m_msg.db.length -= LONG_MAX; m_msg.db.carry++;	}#ifdef DEBUG	/* This finds most reasons for core dumps */	if (pst.debug_lib)	  for (i=0; i<n1; i++) {	    if (aa1[i]>=pst.nsqx || aa1[i] <= 0) {	      fprintf(stderr,		      "%s residue[%d/%d] %d range (%d) lcont/ocont: %d/%d\n%s\n",		      current_seq_p->libstr,i,current_seq_p->n1,aa1[i],pst.nsqx,lcont,ocont,aa1ptr+i);	      aa1[i]=0;	      n1=i-1;	      break;	    }	  }#endif	/* don't count long sequences more than once */	if (aa1!=aa1ptr) {	/* this is a continuation */	  n1 += m_msg.l_overlap;	  m_msg.db.entries--;	}#ifdef PROGRESS	if (!m_msg.quiet) 	  if (m_msg.db.entries % 200 == 199) {	    fputc('.',stderr);	    if (m_msg.db.entries % 10000 == 9999) fputc('\n',stderr);	    else if (m_msg.db.entries % 1000 == 999) fputc(' ',stderr);	  }#endif	if (n1<=1) {	  /*	if (igncnt++ <10)		fprintf(stderr,"Ignoring: %s\n",current_seq_p->libstr);	  */	  goto loop2;	}	ntbuff += n1+1;	for (itt=m_msg.revcomp; itt<=m_msg.nitt1; itt++) {	  lib_buf2_p->frame = itt;	  lib_buf2_p++;		/* point to next buf2 */	  lib_bhead_p->buf2_cnt++;	  /* point to the current sequence */	  lib_buf2_p->seq = current_seq_p;	} /* for (itt .. */	if (lcont) {	  memcpy(aa1save,&aa1[n1-m_msg.l_overlap],m_msg.l_overlap);	}	/* if the buffer is filled */	if (lib_bhead_p->buf2_cnt >= max_buf2_cnt || ntbuff >= buf_siz - maxn) {	  /* fprintf(stderr," new empty buffer at: %lld\n", current_seq_p->lseek); */#ifdef COMP_THR		/* if COMP_THR - fill and empty buffers */	  /* provide filled buffer to workers */	  lib_bhead_p->have_data = 1;	  put_rbuf(lib_bhead_p,max_work_buf);	  filled_worker_bufs++;	  empty_reader_bufs--;	  /* get an empty buffer to fill */	  get_rbuf(&lib_bhead_p,max_work_buf);	  empty_reader_bufs++;#else			/* just do the searches */	  if (lib_bhead_p->buf2_type & BUF2_DOWORK) {	    buf_do_work(aa0, m_msg.n0, lib_bhead_p, &pst, f_str);	    if (m_msg.qshuffle)	      buf_qshuf_work(aa0s,m_msg.n0, lib_bhead_p, &pst, qf_str, pst.score_ix);	  }	  if (lib_bhead_p->buf2_type & BUF2_DOSHUF) {	    buf_shuf_work(aa0,m_msg.n0, aa1shuff, pst.zs_win,			  lib_bhead_p, &pst, f_str, pst.score_ix);	  }#endif	  /* "empty" buffers have results that must be processed */	  if (lib_bhead_p->buf2_cnt && lib_bhead_p->have_results) {#ifdef COMP_THR	    filled_worker_bufs--;#endif	    save_best(lib_bhead_p,m_msg, &pst, &m_msg.ldb, fdata,m_msg.qsfnum,		      &m_msg.hist, &m_msg.pstat_void );	    /* this section of code is only used for re-cycled buffers */	    if (lib_bhead_p->have_best_save) {	      lib_buf2_p = lib_bhead_p->buf2;	      while (lib_bhead_p->buf2_cnt--) {		if (lib_buf2_p->best_save != NULL) preserve_seq(lib_buf2_p, best_seqs, best);		lib_buf2_p->best_save = NULL;		lib_buf2_p++;	      }	      lib_bhead_p->have_best_save = 0;	    }	  }	  /* now the buffer is truly empty, fill it up */	  lib_bhead_p->buf2_cnt = 0;	  lib_bhead_p->have_results = 0;	  lib_bhead_p->buf2_type = BUF2_DOWORK;	  if (pst.zsflag > 10) 	  lib_bhead_p->buf2_type |= BUF2_DOSHUF;	  lib_buf2_p = lib_bhead_p->buf2;	  current_seq_p = lib_buf2_p->seq = lib_bhead_p->buf2[0].seq_b;	  aa1 = current_seq_p->aa1b;	  ntbuff = 0;	}	else {	/* room left in current buffer, increment ptrs */	  current_seq_p++;	  lib_buf2_p->seq = current_seq_p;	  aa1 = current_seq_p->aa1b = current_seq_p[-1].aa1b+n1+1;	}      loop2: 	if (lcont) {	  maxt = m_msg.maxt3;	  memcpy(aa1,aa1save,m_msg.l_overlap);	  aa1ptr= &aa1[m_msg.l_overlap];	/* aa1ptr is where the next GETLIB sequence goes */					/* aa1 is the beginning of the sequence for do_work() */	  loffset += n1 - m_msg.l_overlap;	/* this must be n1, which is the old value, not current_seq_p->n1 */ 	  ocont = lcont;	}	else {	  maxt = maxn;	  aa1ptr=aa1;	  if (ocont) *n1tot_cur = n1tot_v;	  ocont = 0;	  loffset = 0l;	  n1tot_v = 0;	  n1tot_cur = NULL;	}      } /* end while((n1=getlib())) */    } /* end iln=1..nln */      /* all done */#ifdef COMP_THR    /* check last buffers for any results */    if (lib_bhead_p->buf2_cnt > 0) {      lib_bhead_p->have_data = 1;      put_rbuf(lib_bhead_p,max_work_buf);      empty_reader_bufs--;    }    info_lib_range_p = work_info[0].info_lib_range;    /* wait for the threads to finish */    wait_rbuf(max_work_buf - empty_reader_bufs);    for (i=0, t_reader_buf_readp = reader_buf_readp-1;	 i < num_reader_bufs - empty_reader_bufs ; i++, t_reader_buf_readp--) {      if (t_reader_buf_readp < 0) t_reader_buf_readp = max_work_buf - 1;      t_lib_bhead_p = reader_buf[t_reader_buf_readp];      /*      fprintf(stderr," buf2_cnt[%d]: %d [data/res:%d/%d]\n",	      t_reader_buf_readp,t_lib_bhead_p->buf2_cnt,      	      t_lib_bhead_p->have_data, t_lib_bhead_p->have_results);      */      if (t_lib_bhead_p->buf2_cnt > 0 && t_lib_bhead_p->have_results) {	save_best(t_lib_bhead_p,m_msg, &pst, &m_msg.ldb, fdata,m_msg.qsfnum,		  &m_msg.hist, &m_msg.pstat_void);	t_lib_bhead_p->buf2_cnt = t_lib_bhead_p->have_results = 0;      }    }    /*    fprintf(stderr," reader_buf_readp: %d nbest: %d\n",	    reader_buf_readp,nbest);    */#else    if (lib_bhead_p->buf2_type & BUF2_DOWORK) {      buf_do_work(aa0, m_msg.n0, lib_bhead_p, &pst, f_str);      if (m_msg.qshuffle)	buf_qshuf_work(aa0s,m_msg.n0, lib_bhead_p, &pst, qf_str, pst.score_ix);    }    if (lib_bhead_p->buf2_type & BUF2_DOSHUF) {      buf_shuf_work(aa0, m_msg.n0, aa1shuff, pst.zs_win,		    lib_bhead_p, &pst, f_str, pst.score_ix);    }    if (lib_bhead_p->buf2_cnt > 0) {      save_best(lib_bhead_p,m_msg, &pst, &m_msg.ldb, fdata,m_msg.qsfnum,		&m_msg.hist, &m_msg.pstat_void);    }    lib_bhead_p->buf2_cnt = lib_bhead_p->have_results = 0;#endif#ifdef PROGRESS    if (!m_msg.quiet)      if (m_msg.db.entries >= 200) {fprintf(stderr," Done!\n");}#endif    m_msg.nbr_seq = m_msg.db.entries;    get_param(&pst, info_gstring2p,info_gstring3);    /* *************************** */    /* analyze the last results    */    /* *************************** */    #ifndef SAMP_STATS    if (!stats_done && nstats > 0) {#endif      /* we ALWAYS do this if SAMP_STATS, because the statistics may have changed */      pst.zsflag_f = process_hist(stats,nstats,m_msg, &pst,&m_msg.hist,				  &m_msg.pstat_void,stats_done);      if (m_msg.pstat_void != NULL) {	stats_done = 1;	for (i = 0; i < nbest; i++) {	  bestp_arr[i]->zscore =	    (*find_zp)(bestp_arr[i]->rst.score[pst.score_ix],		       bestp_arr[i]->rst.escore, bestp_arr[i]->seq->n1, 		       bestp_arr[i]->rst.comp, m_msg.pstat_void);	}#ifndef SAMP_STATS      }      else pst.zsflag = -1;#endif    }    /* if there are not many scores, produce better statistics by shuffling */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -