📄 p2_complib2.c
字号:
if ((aa01 = (unsigned char *) malloc ((MAXTST + SEQ_PAD + 1) * sizeof (char))) == NULL) s_abort ("Unable to allocate query sequence", ""); fputs(iprompt0,stdout); fprintf(stdout," %s%s\n",verstr,refstr); /* Query library */ if (m_msg0.tname[0] == '\0') { if (m_msg0.quiet == 1) s_abort("query sequence undefined",""); fprintf(stderr, "Pvcomplib [%s]\n",mp_verstr); l1: fputs (iprompt1, stdout); fflush (stdout); if (fgets (m_msg0.tname, 80, stdin) == NULL) s_abort ("Unable to read query library name",""); if ((bp=strchr(m_msg0.tname,'\n'))!=NULL) *bp='\0'; if (m_msg0.tname[0] == '\0') goto l1; } /* Open query library */ if ((q_file_p= openlib(m_msg0.tname, m_msg0.qdnaseq,qascii,!m_msg0.quiet,NULL))==NULL) { s_abort(" cannot open library ",m_msg0.tname); } /* else { printf ("searching %s library\n",m_msg0.tname); } */ ntt.entries = qtt.entries = 0; ntt.carry = qtt.carry = 0; ntt.length = qtt.length = 0l; /* Fetch first sequence */ qlcont = 0; while (qlib < m_msg0.ql_start) { /* skip through query sequences */ pst.n0 = qm_msg0.n0 = m_msg0.n0 = QGETLIB (aa00, MAXTST, q_bline, sizeof(q_bline), &qseek, &qlcont, q_file_p,&m_msg0.sq0off); strncpy(qm_msg0.libstr,q_bline,sizeof(qm_msg0.libstr)-20); qm_msg0.libstr[sizeof(qm_msg0.libstr)-21]='\0'; if ((bp=strchr(qm_msg0.libstr,' '))!=NULL) *bp='\0'; /* if annotations are included in sequence, remove them */ if (m_msg0.q_ann_flg) { pst.n0 = qm_msg0.n0 = m_msg0.n0 = ann_scan(aa00, m_msg0.n0, &m_msg0, m_msg0.qdnaseq);#ifdef DEBUG fprintf(stderr,"m_msp0->/aa0a is: %o/%o\n",&m_msg0,m_msg0.aa0a);#endif } if (m_msg0.term_code && !(m_msg0.qdnaseq == SEQT_DNA || m_msg0.qdnaseq==SEQT_RNA) && aa00[m_msg0.n0-1]!='*') { aa00[m_msg0.n0++]='*'; aa00[m_msg0.n0]=0; pst.n0 = qm_msg0.n0 = m_msg0.n0; } /* check for subset */ if (q_file_p->opt_text[0]!='\0') { if (q_file_p->opt_text[0]=='-') { sstart=0; sscanf(&q_file_p->opt_text[1],"%d",&sstop); } else { sscanf(&q_file_p->opt_text[0],"%d-%d",&sstart,&sstop); sstart--; if (sstop <= 0 ) sstop = BIGNUM; } for (id=0,is=sstart; is<min(m_msg0.n0,sstop); ) aa00[id++]=aa00[is++]; aa00[id]=0; pst.n0 = qm_msg0.n0 = m_msg0.n0 = min(m_msg0.n0,sstop)-sstart; if (m_msg0.sq0off==1) m_msg0.sq0off = sstart+1; } qlib++; if (m_msg0.n0 <= 0) s_abort ("Unable to fetch sequence from library: ", m_msg0.tname); } qtt.entries=1; qm_msg0.slist = 0; /* now have correct query sequence - check sequence type and reset */ if (m_msg0.qdnaseq == SEQT_UNK) { /* check for DNA sequence */ if (m_msg0.n0 > 20 && (float)scanseq(aa00,m_msg0.n0,"ACGTUNacgtun")/(float)m_msg0.n0>0.85) { pascii = nascii; m_msg0.qdnaseq = SEQT_DNA; } else { /* its protein */ pascii = aascii; m_msg0.qdnaseq = SEQT_PROT; } re_ascii(qascii,pascii); init_ascii(pst.ext_sq_set,qascii,m_msg0.qdnaseq); m_msg0.n0 = recode(aa00,m_msg0.n0,qascii,pst.nsqx); } /* for ALTIVEC, must pad with 15 NULL's */ for (i=0; i<SEQ_PAD+1; i++) {aa00[m_msg0.n0+i]=0;} qtt.length = m_msg0.n0; if (qlib <= 0) { fprintf(stderr," no sequences found in query library\n"); exit(1); } resetp (&m_msg0, &pst); sprintf(tmp_str," %d %s", qm_msg0.n0, q_sqnam); leng = strlen (qm_msg0.libstr); if (leng + strlen(tmp_str) >= sizeof(qm_msg0.libstr)) qm_msg0.libstr[sizeof(qm_msg0.libstr)-strlen(tmp_str)-2] = '\0'; strncat(&qm_msg0.libstr[0],tmp_str, sizeof(qm_msg0.libstr)-strlen(qm_msg0.libstr)-1); qm_msg0.libstr[sizeof(qm_msg0.libstr)-1]='\0'; qm_msg0.seqnm = qlib-1; /* Library */ if (strlen (m_msg0.lname) == 0) { if (m_msg0.quiet == 1) s_abort("library name undefined",""); libchoice(m_msg0.lname, sizeof(m_msg0.lname), &m_msg0); } libselect(m_msg0.lname, &m_msg0); /* Get additional parameters here */ if (!m_msg0.quiet) query_parm (&m_msg0, &pst); last_init(&m_msg0, &pst,nnodes-FIRSTNODE); memcpy(&m_msg1, &m_msg0, sizeof(m_msg0)); /* m_msg0.maxn needs to be set to MAXLIB or MAXTRN, depending on the function - max_tot has the MAXTST + (MAXLIB|MAXTRN) */ if (m_msg0.maxn <= 0) m_msg0.maxn = m_msg0.max_tot - MAXTST; if (m_msg0.maxn < 2 * m_msg0.dupn) m_msg0.maxn = 5*m_msg0.dupn; pst.maxlen = m_msg0.maxn; m_msg0.l_overlap = m_msg0.dupn; m_msg0.maxt3 = m_msg0.maxn-m_msg0.l_overlap; /* ******************** */ /* initial manager code */ /* ******************** */ outfd = stdout; if (m_msg0.outfile[0]!='\0') { if ((outfd = fopen(m_msg0.outfile,"w"))==NULL) { fprintf(stderr, "cannot open %s for output\n", m_msg0.outfile); outfd = stdout; } } /* Label the output */ printf("Query library %s vs %s library\n", m_msg0.tname, m_msg0.lname); /* Allocate space for saved scores */ if ((best = (struct beststr *)malloc((MAX_BEST+1)*sizeof(struct beststr)))==NULL) s_abort ("Cannot allocate best struct",""); if ((bptr = (struct beststr **)malloc((MAX_BEST+1)*sizeof(struct beststr *)))==NULL) s_abort ("Cannot allocate bptr",""); /* Initialize bptr */ for (nbest = 0; nbest < MAX_BEST+1; nbest++) bptr[nbest] = &best[nbest]; best++; bptr++; best[-1].rst.score[0]=best[-1].rst.score[1]=best[-1].rst.score[2]=INT_MAX; best[-1].zscore = FLT_MAX; best[-1].rst.escore = FLT_MIN; best_flag = 0; if ((stats = (struct stat_str *)calloc((size_t)MAX_STATS,sizeof(struct stat_str))) ==NULL) s_abort ("Cannot allocate stats struct",""); nstats = 0; /* Now open the second library, divide it, send sequences to all workers */ /* Set up buffer for reading the library: We will start by using a 2 Mbyte buffer for each worker. For proteins, that means 5,000 sequences of length 400 (average). For DNA, that means 2,000 sequences of length 1000. At the moment, those are good averages. */ if (max_buf_cnt <= 0) { if (m_msg0.ldnaseq== SEQT_DNA) { ave_seq_len = AVE_NT_LEN; max_buf_cnt = DEF_WORKER_BUF/AVE_NT_LEN; } else { ave_seq_len = AVE_AA_LEN; max_buf_cnt = DEF_WORKER_BUF/AVE_AA_LEN; } } /* however - buffer sizes should be a function of the number of workers so that all the workers are kept busy. Assuming a 10,000 entry library is the smallest we want to schedule, then */ if (max_buf_cnt > 10000/(nnodes-FIRSTNODE)) max_buf_cnt = 10000/(2*(nnodes-FIRSTNODE)); /* allocate space for sequence buffers */ m_msg0.pbuf_siz=max_buf_cnt*ave_seq_len; if (m_msg0.pbuf_siz < 5*m_msg0.maxn) m_msg0.pbuf_siz = 5*m_msg0.maxn;#ifdef PVM_SRC #ifdef ROUTE_DIRECT pvm_setopt(PvmRoute,PvmRouteDirect);#endif pvm_initsend(PvmDataRaw); pvm_pkint(&nnodes,1,1); pvm_pkint(pinums,nnodes,1); pvm_pkbyte((char *)&m_msg0,(int)sizeof(m_msg0),1); for (node = FIRSTNODE; node<nnodes; node++) if (pvm_send(pinums[node],STARTTYPE0)<0) { pvm_perror("pvm_send1"); pvm_exit(); exit(1); }#endif#ifdef MPI_SRC for (node = FIRSTNODE; node<nnodes; node++) { MPI_Send(&m_msg0,(int)sizeof(m_msg0),MPI_BYTE,node,STARTTYPE0, MPI_COMM_WORLD); }#endif /* now send pst, sascii */#ifdef PVM_SRC pvm_initsend(PvmDataRaw); pvm_pkbyte((char *)&pst,(int)sizeof(pst),1); pvm_pkbyte((char *)pascii,(int)sizeof(aascii),1); for (node = FIRSTNODE; node< nnodes; node++) pvm_send(pinums[node],STARTTYPE1); /* send pam12 */ pvm_initsend(PvmDataRaw); pvm_pkint(pam12,m_msg0.pamd1*m_msg0.pamd2,1); for (node = FIRSTNODE; node< nnodes; node++) pvm_send(pinums[node],STARTTYPE2); /* send pam12x */ pvm_initsend(PvmDataRaw); pvm_pkint(pam12x,m_msg0.pamd1*m_msg0.pamd2,1); for (node = FIRSTNODE; node< nnodes; node++) pvm_send(pinums[node],STARTTYPE3);#endif#ifdef MPI_SRC for (node=FIRSTNODE; node < nnodes; node++) { MPI_Send(&pst,(int)sizeof(pst),MPI_BYTE,node,STARTTYPE1, MPI_COMM_WORLD); MPI_Send(pascii,(int)sizeof(aascii),MPI_BYTE,node,STARTTYPE1, MPI_COMM_WORLD); MPI_Send(pam12,m_msg0.pamd1*m_msg0.pamd2,MPI_INT,node,STARTTYPE2, MPI_COMM_WORLD); MPI_Send(pam12x,m_msg0.pamd1*m_msg0.pamd2,MPI_INT,node,STARTTYPE3, MPI_COMM_WORLD); }#endif if ((n1_arr = (int *)calloc((size_t)(max_buf_cnt+1),sizeof(int))) ==NULL) { fprintf(stderr," cannot allocate n1_arr %d\n",max_buf_cnt+1); s_abort(" cannot allocate n1_arr",""); exit(1); } if ((aa1i_arr = (int *)calloc((size_t)(max_buf_cnt+1),sizeof(int))) ==NULL) { fprintf(stderr," cannot allocate aa1i_arr %d\n",max_buf_cnt+1); s_abort(" cannot allocate aa1i_arr",""); exit(1); } if ((m_seqnm_arr= (int *)calloc((size_t)(max_buf_cnt+1),sizeof(int))) ==NULL) { fprintf(stderr," cannot allocate m_seqnm_arr %d\n",max_buf_cnt+1); s_abort(" cannot allocate m_seqnm_arr",""); exit(1); } if ((aa1_buf = (unsigned char *)calloc((size_t)(m_msg0.pbuf_siz),sizeof(unsigned char))) ==NULL) { s_abort(" cannot allocate library buffer %d",""); exit(1); } /* also allocate space for descriptions. Assume max of 250,000 sequences/ worker for now */ /* max_sql is the maximum number of library sequences that can be stored */ max_sql = MAXSQL; if ((ldes=(struct seq_record *)calloc(max_sql,sizeof(struct seq_record)))==NULL) { fprintf(stderr," failure to allocate ldes(%d) %ld\n", max_sql,max_sql*sizeof(struct seq_record)); s_abort("cannot allocate ldes",""); exit(1); } max_bline_b = MAXSQL * (m_msg0.aln.llen+1)/4; bline_inc = m_msg0.aln.llen; if (m_msg0.markx & MX_M9SUMM) bline_inc += 40; i = 4; while (i-- > 0) { if ((bline_buf=(char *)calloc(max_bline_b,sizeof(char)))!=NULL) break; max_bline_b /= 2; bline_inc /= 2; } if (bline_buf == NULL) { fprintf(stderr," failure to allocate bline_buf(%d) %d\n", max_sql,max_bline_b); s_abort(" cannot allocate bline_buf",""); } bline_bufp = bline_buf; bline_buf_mx = bline_buf+max_bline_b; /* the code for filling the buffers is copied from comp_thr.c */ /* the major differences reflect the fact that all library descriptions will be kept in memory, indexed by sequence number. As a result, one buffer is filled by this loop - ldes[] has the descriptive information for every sequence this array could potentially be quite large */ /* now open the library and start reading */ /* get a buffer and fill it up */ ntbuff = 0; m_seqnm = 0; /* m_seqnm is the number of this library sequence */ nseq = 0; node = FIRSTNODE; /* sqs2_buf[0].aa1 = aa1_buf; */ aa1 = aa1_buf; /* iln counts through each library */ for (iln = 0; iln < m_msg0.nln; iln++) { if ((l_file_p= openlib(m_msg0.lbnames[iln], m_msg0.ldnaseq,lascii,!m_msg0.quiet,NULL))==NULL) { fprintf(stderr," cannot open library %s\n",m_msg0.lbnames[iln]); continue; } loffset = 0l; lcont = 0; ocont = 0; n1tot_v = n1tot_cnt = 0; n1tot_cur = n1tot_ptr = NULL; maxt = m_msg0.maxn; /* read sequence directly into buffer */ aa1ptr = aa1; /* = sqs2_buf[0].aa1; */ while ((n1= LGETLIB(aa1ptr,maxt,t_bline,sizeof(t_bline),&lseek,&lcont, l_file_p,&l_off))>=0) { /* skip sequences outside range */ if (n1 < m_msg0.n1_low || n1 > m_msg0.n1_high) goto loop1; /* add termination code for proteins, if asked */ if (m_msg0.term_code && !lcont && m_msg0.ldnaseq==SEQT_PROT && aa1ptr[n1-1]!=m_msg0.term_code) { aa1ptr[n1++]=m_msg0.term_code; aa1ptr[n1]=0; } /* check for a continued sequence and provide a pointer to the n1_tot array if lcont || ocont */ n1tot_v += n1; if (lcont && !ocont) { /* get a new pointer */ if (n1tot_cnt <= 0) { if ((n1tot_ptr=calloc(1000,sizeof(int)))==NULL) { fprintf(stderr," cannot allocate n1tot_ptr\n"); exit(1); } else {n1tot_cnt=1000;} } n1tot_cnt--; n1tot_cur = n1tot_ptr++; } if (bline_bufp + bline_inc > bline_buf_mx) { i = 4; while (i-- > 0) { if ((bline_buf=(char *)calloc(max_bline_b,sizeof(char)))!=NULL) break; fprintf(stderr," failure to allocate bline_buf(%d) %d\n", max_sql,max_bline_b); max_bline_b /= 2; bline_inc /= 2; } if (bline_buf != NULL) { bline_bufp = bline_buf; bline_buf_mx = bline_buf+max_bline_b; } else { s_abort("cannot allocate bline_buf ",""); exit(1); } } if (bline_bufp+bline_inc < bline_buf_mx ) { strncpy(bline_bufp,t_bline,bline_inc); ldes[m_seqnm].bline = bline_bufp; bline_bufp[bline_inc]= '\0'; bline_bufp += bline_inc+1; } else { fprintf(stderr," bline_buf overrun\n"); } ntt.entries++; /* inc number of sequences */ ntt.length += n1; /* update total library length */ if (ntt.length > LONG_MAX) {ntt.length -= LONG_MAX; ntt.carry++;}#ifdef DEBUG /* This discovers most reasons for core dumps */ if (pst.debug_lib) for (i=0; i<n1; i++) if (aa1[i]>pst.nsq) { fprintf(stderr, "%s residue[%d/%d] %d range (%d) lcont/ocont: %d/%d\n%s\n", qm_msg0.libstr,i,n1,aa1[i],pst.nsq,lcont,ocont,aa1ptr+i); aa1[i]=0; n1=i-1; break; }#endif /* for ALTIVEC, must pad with 15 NULL's */ for (i=0; i<SEQ_PAD+1; i++) {aa1ptr[n1+i]=0;} /* don't count long sequences more than once */ if (aa1!=aa1ptr) { n1 += m_msg0.l_overlap; ntt.entries--; } if (n1>1) { seq_p = &ldes[m_seqnm]; aa1i_arr[nseq] = (int)(aa1-aa1_buf); m_seqnm_arr[nseq] = m_seqnm; seq_p->n1 = n1_arr[nseq] = n1; seq_p->n1tot_p = n1tot_cur; seq_p->lseek = lseek; seq_p->l_offset = loffset+l_off; seq_p->cont = ocont; seq_p->wrkr = node; seq_p->m_seqnm = m_seqnm;#ifdef SUPERFAMNUM seq_p->nsfnum = nsfnum; if ((seq_p->sfnum[0]=sfnum[0])>0 &&
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -