📄 comp_lib2.c
字号:
/* rline[] is a tmp string */ if (m_msg.qdnaseq == SEQT_DNA || m_msg.qdnaseq == SEQT_RNA) { strncpy(rline,(m_msg.qframe==1)? " (forward-only)" : "\0",sizeof(rline)); rline[sizeof(rline)-1]='\0'; } else rline[0]='\0'; leng = (int)strlen(m_msg.qtitle); if (leng > 50) leng -= 10; sprintf (&m_msg.qtitle[leng], " %d %s", m_msg.n0, m_msg.sqnam); m_msg.seqnm = 0; if (m_msg.std_output) {#ifdef COMP_MLIB printf("%3d>>>%s - %d %s%s\n", qlib, m_msg.qtitle, m_msg.n0, m_msg.sqnam, (m_msg.revcomp ? " (reverse complement)" : rline));#else printf("%.50s: %d %s%s\n %s\n", info_qlabel, m_msg.n0, m_msg.sqnam, (m_msg.revcomp ? " (reverse complement)" : rline));#endif /* check for annotation */ if (m_msg.q_ann_flg) { printf("Annotation: "); for (j=0; j<m_msg.n0; j++) { if (m_msg.aa0a[j] && m_msg.ann_arr[m_msg.aa0a[j]] != ' ' ) { printf("|%d:%c%c", j+1,m_msg.ann_arr[m_msg.aa0a[j]],pst.sq[aa0[0][j]]); } } printf("\n"); } printf("Library: %.60s", m_msg.ltitle); fflush(stdout); } n_libstr=MAX_UID; fflush(outfd); tprev = s_time(); if (fdata) fprintf(fdata,">>>%ld %3d\t%-50s\n",qtt.entries,m_msg.n0,m_msg.qtitle); qtt.length += m_msg.n0; qtt.entries++;#ifndef COMP_THR /* initialize the comparison function, returning f_str */ init_work (aa0[0], m_msg.n0, &pst, &f_str[0]); have_f_str=1; f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] = f_str[0]; /* label library size limits */ if (pst.n1_low > 0 && pst.n1_high < BIGNUM) sprintf(info_lib_range," (range: %d-%d)",pst.n1_low,pst.n1_high); else if (pst.n1_low > 0) sprintf(info_lib_range," (range: >%d)",pst.n1_low); else if (pst.n1_high < BIGNUM) sprintf(info_lib_range," (range: <%d)",pst.n1_high); else info_lib_range[0]='\0'; info_lib_range[sizeof(info_lib_range)-1]='\0'; info_lib_range_p = info_lib_range; if (m_msg.qframe == 2) { init_work ( aa0[1], m_msg.n0, &pst, &f_str[1]); } if (m_msg.qshuffle) { init_work ( aa0s, m_msg.n0, &pst, &qf_str); } lib_bhead_p = lib_buf2_list; /* equivalent to un-threaded get_rbuf() */#else /* COMP_THR */ start_thr(); /* now open the library and start reading */ /* get a buffer and fill it up */ get_rbuf(&lib_bhead_p,max_work_buf); empty_reader_bufs++;#endif lib_bhead_p->buf2_cnt = 0; lib_bhead_p->have_results = 0; lib_bhead_p->buf2_type=BUF2_DOWORK; if (pst.zsflag > 10) { lib_bhead_p->buf2_type |= BUF2_DOSHUF; } lib_buf2_p = lib_bhead_p->buf2; ntbuff = 0; /* open the library - start the search */ for (iln = 0; iln < m_msg.nln; iln++) { if ((m_msg.lb_mfd[iln] = m_file_p= openlib(m_msg.lbnames[iln], m_msg.ldnaseq, lascii, !m_msg.quiet, m_msg.lb_mfd[iln])) ==NULL) { fprintf(stderr," cannot open library %s\n",m_msg.lbnames[iln]); continue; } loffset = 0l; lcont = 0; ocont = 0; n1tot_v = n1tot_cnt = 0; n1tot_cur = n1tot_ptr = NULL; /* get next buffer to read into */ maxt = maxn; /* read sequence directly into buffer */ current_seq_p = lib_buf2_p->seq; aa1ptr = aa1 = current_seq_p->aa1b; while ((n1=GETLIB(aa1ptr, maxt, libstr, n_libstr, &lseek, &lcont, m_file_p, &(current_seq_p->l_off)))>=0) { /* check for subset on library*/ if (m_file_p->opt_text[0]!='\0') { if (m_file_p->opt_text[0]=='-') { lstart=0; sscanf(&m_file_p->opt_text[1],"%d",&lstop); } else { sscanf(&m_file_p->opt_text[0],"%d-%d",&lstart,&lstop); lstart--; if (lstop <= 0 ) lstop = BIGNUM; } for (id=0,is=lstart; is<min(n1,lstop); ) aa1[id++]=aa1[is++]; aa1[id]=0; n1 = min(n1,lstop)-lstart; current_seq_p->l_off += lstart; } current_seq_p->n1 = n1; current_seq_p->m_file_p = (void *)m_file_p; current_seq_p->cont = ocont+1; current_seq_p->l_offset = loffset; current_seq_p->lseek = lseek;#ifdef SUPERFAMNUM current_seq_p->nsfnum = nsfnum; if ((current_seq_p->sfnum[0]=sfnum[0])>0 && (current_seq_p->sfnum[1]=sfnum[1])>0 && (current_seq_p->sfnum[2]=sfnum[2])>0 && (current_seq_p->sfnum[3]=sfnum[3])>0 && (current_seq_p->sfnum[4]=sfnum[4])>0 && (current_seq_p->sfnum[5]=sfnum[5])>0 && (current_seq_p->sfnum[6]=sfnum[6])>0 && (current_seq_p->sfnum[7]=sfnum[7])>0 && (current_seq_p->sfnum[8]=sfnum[8])>0 && (current_seq_p->sfnum[9]=sfnum[9])>0) ;#endif if ((bp=strchr(libstr,' '))!=NULL) *bp='\0'; strncpy(current_seq_p->libstr,libstr,MAX_UID); /* get old libstr for lcont>0 */ if (m_msg.term_code && !lcont && m_msg.ldnaseq==SEQT_PROT && aa1ptr[n1-1]!=m_msg.term_code) { aa1ptr[n1++]=m_msg.term_code; aa1ptr[n1]=0; }#ifdef DEBUG if (n_libstr <= MAX_UID) { if ((bp=strchr(current_seq_p->libstr,' '))!=NULL) *bp='\0'; } if (aa1[-1]!='\0' || aa1ptr[n1]!='\0') { fprintf(stderr,"%s: aa1[%d] at %ld:%lld missing NULL boundaries: %d %d\n", current_seq_p->libstr,n1, m_msg.db.entries+1,current_seq_p->lseek, aa1[-1],aa1ptr[n1]); }#endif /* check for a continued sequence and provide a pointer to the n1_tot array if lcont || ocont */ n1tot_v += n1; if (lcont && !ocont) { /* get a new pointer */ if (n1tot_cnt <= 0) { if ((n1tot_ptr=calloc(1000,sizeof(int)))==NULL) { fprintf(stderr," cannot allocate n1tot_ptr\n"); exit(1); } else {n1tot_cnt=1000;} } n1tot_cnt--; n1tot_cur = n1tot_ptr++; } current_seq_p->n1tot_p = n1tot_cur; /* skip based on size range */ /* if (n1tot_v < m_msg.n1_low || n1tot_v > m_msg.n1_high) { goto loop2; } */ m_msg.db.entries++; m_msg.db.length += n1; if (m_msg.db.length > LONG_MAX) { m_msg.db.length -= LONG_MAX; m_msg.db.carry++; }#ifdef DEBUG /* This finds most reasons for core dumps */ if (pst.debug_lib) for (i=0; i<n1; i++) { if (aa1[i]>=pst.nsqx || aa1[i] <= 0) { fprintf(stderr, "%s residue[%d/%d] %d range (%d) lcont/ocont: %d/%d\n%s\n", current_seq_p->libstr,i,current_seq_p->n1,aa1[i],pst.nsqx,lcont,ocont,aa1ptr+i); aa1[i]=0; n1=i-1; break; } }#endif /* don't count long sequences more than once */ if (aa1!=aa1ptr) { /* this is a continuation */ n1 += m_msg.l_overlap; m_msg.db.entries--; }#ifdef PROGRESS if (!m_msg.quiet) if (m_msg.db.entries % 200 == 199) { fputc('.',stderr); if (m_msg.db.entries % 10000 == 9999) fputc('\n',stderr); else if (m_msg.db.entries % 1000 == 999) fputc(' ',stderr); }#endif if (n1<=1) { /* if (igncnt++ <10) fprintf(stderr,"Ignoring: %s\n",current_seq_p->libstr); */ goto loop2; } ntbuff += n1+1; for (itt=m_msg.revcomp; itt<=m_msg.nitt1; itt++) { lib_buf2_p->frame = itt; lib_buf2_p++; /* point to next buf2 */ lib_bhead_p->buf2_cnt++; /* point to the current sequence */ lib_buf2_p->seq = current_seq_p; } /* for (itt .. */ if (lcont) { memcpy(aa1save,&aa1[n1-m_msg.l_overlap],m_msg.l_overlap); } /* if the buffer is filled */ if (lib_bhead_p->buf2_cnt >= max_buf2_cnt || ntbuff >= buf_siz - maxn) { /* fprintf(stderr," new empty buffer at: %lld\n", current_seq_p->lseek); */#ifdef COMP_THR /* if COMP_THR - fill and empty buffers */ /* provide filled buffer to workers */ lib_bhead_p->have_data = 1; put_rbuf(lib_bhead_p,max_work_buf); filled_worker_bufs++; empty_reader_bufs--; /* get an empty buffer to fill */ get_rbuf(&lib_bhead_p,max_work_buf); empty_reader_bufs++;#else /* just do the searches */ if (lib_bhead_p->buf2_type & BUF2_DOWORK) { buf_do_work(aa0, m_msg.n0, lib_bhead_p, &pst, f_str); if (m_msg.qshuffle) buf_qshuf_work(aa0s,m_msg.n0, lib_bhead_p, &pst, qf_str, pst.score_ix); } if (lib_bhead_p->buf2_type & BUF2_DOSHUF) { buf_shuf_work(aa0,m_msg.n0, aa1shuff, pst.zs_win, lib_bhead_p, &pst, f_str, pst.score_ix); }#endif /* "empty" buffers have results that must be processed */ if (lib_bhead_p->buf2_cnt && lib_bhead_p->have_results) {#ifdef COMP_THR filled_worker_bufs--;#endif save_best(lib_bhead_p,m_msg, &pst, &m_msg.ldb, fdata,m_msg.qsfnum, &m_msg.hist, &m_msg.pstat_void ); /* this section of code is only used for re-cycled buffers */ if (lib_bhead_p->have_best_save) { lib_buf2_p = lib_bhead_p->buf2; while (lib_bhead_p->buf2_cnt--) { if (lib_buf2_p->best_save != NULL) preserve_seq(lib_buf2_p, best_seqs, best); lib_buf2_p->best_save = NULL; lib_buf2_p++; } lib_bhead_p->have_best_save = 0; } } /* now the buffer is truly empty, fill it up */ lib_bhead_p->buf2_cnt = 0; lib_bhead_p->have_results = 0; lib_bhead_p->buf2_type = BUF2_DOWORK; if (pst.zsflag > 10) lib_bhead_p->buf2_type |= BUF2_DOSHUF; lib_buf2_p = lib_bhead_p->buf2; current_seq_p = lib_buf2_p->seq = lib_bhead_p->buf2[0].seq_b; aa1 = current_seq_p->aa1b; ntbuff = 0; } else { /* room left in current buffer, increment ptrs */ current_seq_p++; lib_buf2_p->seq = current_seq_p; aa1 = current_seq_p->aa1b = current_seq_p[-1].aa1b+n1+1; } loop2: if (lcont) { maxt = m_msg.maxt3; memcpy(aa1,aa1save,m_msg.l_overlap); aa1ptr= &aa1[m_msg.l_overlap]; /* aa1ptr is where the next GETLIB sequence goes */ /* aa1 is the beginning of the sequence for do_work() */ loffset += n1 - m_msg.l_overlap; /* this must be n1, which is the old value, not current_seq_p->n1 */ ocont = lcont; } else { maxt = maxn; aa1ptr=aa1; if (ocont) *n1tot_cur = n1tot_v; ocont = 0; loffset = 0l; n1tot_v = 0; n1tot_cur = NULL; } } /* end while((n1=getlib())) */ } /* end iln=1..nln */ /* all done */#ifdef COMP_THR /* check last buffers for any results */ if (lib_bhead_p->buf2_cnt > 0) { lib_bhead_p->have_data = 1; put_rbuf(lib_bhead_p,max_work_buf); empty_reader_bufs--; } info_lib_range_p = work_info[0].info_lib_range; /* wait for the threads to finish */ wait_rbuf(max_work_buf - empty_reader_bufs); for (i=0, t_reader_buf_readp = reader_buf_readp-1; i < num_reader_bufs - empty_reader_bufs ; i++, t_reader_buf_readp--) { if (t_reader_buf_readp < 0) t_reader_buf_readp = max_work_buf - 1; t_lib_bhead_p = reader_buf[t_reader_buf_readp]; /* fprintf(stderr," buf2_cnt[%d]: %d [data/res:%d/%d]\n", t_reader_buf_readp,t_lib_bhead_p->buf2_cnt, t_lib_bhead_p->have_data, t_lib_bhead_p->have_results); */ if (t_lib_bhead_p->buf2_cnt > 0 && t_lib_bhead_p->have_results) { save_best(t_lib_bhead_p,m_msg, &pst, &m_msg.ldb, fdata,m_msg.qsfnum, &m_msg.hist, &m_msg.pstat_void); t_lib_bhead_p->buf2_cnt = t_lib_bhead_p->have_results = 0; } } /* fprintf(stderr," reader_buf_readp: %d nbest: %d\n", reader_buf_readp,nbest); */#else if (lib_bhead_p->buf2_type & BUF2_DOWORK) { buf_do_work(aa0, m_msg.n0, lib_bhead_p, &pst, f_str); if (m_msg.qshuffle) buf_qshuf_work(aa0s,m_msg.n0, lib_bhead_p, &pst, qf_str, pst.score_ix); } if (lib_bhead_p->buf2_type & BUF2_DOSHUF) { buf_shuf_work(aa0, m_msg.n0, aa1shuff, pst.zs_win, lib_bhead_p, &pst, f_str, pst.score_ix); } if (lib_bhead_p->buf2_cnt > 0) { save_best(lib_bhead_p,m_msg, &pst, &m_msg.ldb, fdata,m_msg.qsfnum, &m_msg.hist, &m_msg.pstat_void); } lib_bhead_p->buf2_cnt = lib_bhead_p->have_results = 0;#endif#ifdef PROGRESS if (!m_msg.quiet) if (m_msg.db.entries >= 200) {fprintf(stderr," Done!\n");}#endif m_msg.nbr_seq = m_msg.db.entries; get_param(&pst, info_gstring2p,info_gstring3); /* *************************** */ /* analyze the last results */ /* *************************** */ #ifndef SAMP_STATS if (!stats_done && nstats > 0) {#endif /* we ALWAYS do this if SAMP_STATS, because the statistics may have changed */ pst.zsflag_f = process_hist(stats,nstats,m_msg, &pst,&m_msg.hist, &m_msg.pstat_void,stats_done); if (m_msg.pstat_void != NULL) { stats_done = 1; for (i = 0; i < nbest; i++) { bestp_arr[i]->zscore = (*find_zp)(bestp_arr[i]->rst.score[pst.score_ix], bestp_arr[i]->rst.escore, bestp_arr[i]->seq->n1, bestp_arr[i]->rst.comp, m_msg.pstat_void); }#ifndef SAMP_STATS } else pst.zsflag = -1;#endif } /* if there are not many scores, produce better statistics by shuffling */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -