📄 dropfs2.c
字号:
free(f_str->priors); free(f_str); *f_arg = NULL; }}void do_fasts (const unsigned char *aa0, const int n0, const unsigned char *aa1, const int n1, struct pstruct *ppst, struct f_struct *f_str, struct rstruct *rst, int *hoff, int opt_prob, int maxsav){ int nd; /* diagonal array size */ int lhval; int kfact; register struct dstruct *dptr; register int tscor; register struct dstruct *diagp; struct dstruct *dpmax; register int lpos; int tpos; struct savestr *vmptr, *vmaxmax; int scor, tmp; int im, ib, nsave, i; int cmps (); /* comparison routine for ksort */ int ktup; int doffset; vmaxmax = &f_str->vmax[maxsav]; ktup = ppst->param_u.fa.ktup; if (n1 < ktup) { rst->score[0] = rst->score[1] = rst->score[2] = 0; rst->escore = 1.0; rst->segnum = 0; rst->seglen = 0; return; } if (n0+n1+1 >= MAXDIAG) { fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1); rst->score[0] = rst->score[1] = rst->score[2] = -1; rst->escore = 2.0; rst->segnum = 0; rst->seglen = 0; return; } nd = n0 + n1; dpmax = &f_str->diag[nd]; for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;) { dptr->stop = -1; dptr->dmax = NULL; dptr++->score = 0; } for (vmptr = f_str->vmax; vmptr < vmaxmax; vmptr++) { vmptr->score = 0; vmptr->exact = 0; } f_str->lowmax = f_str->vmax; f_str->lowscor = 0; /* start hashing */ diagp = &f_str->diag[f_str->noff]; for (lhval=lpos=0; lpos < n1; lpos++, diagp++) { if (ppst->hsq[aa1[lpos]]>=NMAP) { /* skip residue */ lpos++ ; diagp++; while (lpos < n1 && ppst->hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;} if (lpos >= n1) break; lhval = 0; } lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]]; for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) { dptr = &diagp[-tpos]; if (f_str->l_end[tpos]) { if (dptr->score + f_str->pamh1[aa0[tpos]] == f_str->aa0s[tpos]) { dptr->stop = lpos; dptr->score = f_str->aa0s[tpos]; savemax(dptr, f_str, maxsav, 1, tpos); dptr->dmax = NULL; } else if (dptr->score + f_str->pamh1[aa0[tpos]] > f_str->aa0s[tpos]) { /* fprintf(stderr,"exact match score too high: %d:%d %d < %d + %d - %d:%d - %d > %d\n", tpos, lpos, f_str->aa0s[tpos],dptr->score, f_str->pamh1[aa0[tpos]], dptr->start, dptr->stop, dptr->stop - dptr->start, f_str->aa0l[tpos]); */ dptr->stop = lpos; dptr->start = lpos - f_str->aa0l[tpos]; dptr->score = f_str->aa0s[tpos]; savemax(dptr, f_str, maxsav, 1, tpos); dptr->dmax = NULL; } } else if ((tscor = dptr->stop) >= 0) { tscor++; /* tscor is stop of current, increment it */ if ((tscor -= lpos) <= 0) { /* tscor, the end of the current match, is before lpos, so there is a mismatch - this is also the mismatch cost */ tscor *= 2; scor = dptr->score; /* save the run score on the diag */ if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && f_str->lowscor < scor) { /* if what we will get (tscor + kfact) is < 0 and the score is better than the worst savemax() score, save it */ savemax (dptr, f_str, maxsav,0,-1); } /* if extending is better than starting over, extend */ if ((tscor += scor) >= kfact) { dptr->score = tscor; dptr->stop = lpos; if (f_str->l_end[tpos]) { if (dptr->score == f_str->aa0s[tpos]) { savemax(dptr, f_str, maxsav,1,tpos); dptr->dmax = NULL; } else if (dptr->score > f_str->lowscor) savemax(dptr, f_str, maxsav,0,tpos); } } else { /* otherwise, start new */ dptr->score = kfact; dptr->start = dptr->stop = lpos; } } else { /* tscor is after lpos, so extend one residue */ dptr->score += f_str->pamh1[aa0[tpos]]; dptr->stop = lpos; if (f_str->l_end[tpos]) { if (dptr->score == f_str->aa0s[tpos]) { savemax(dptr, f_str, maxsav,1,tpos); dptr->dmax = NULL; } else if (dptr->score > f_str->lowscor) savemax(dptr, f_str, maxsav,0,tpos); } } } else { /* start new */ dptr->score = f_str->pamh2[lhval]; dptr->start = dptr->stop = lpos; } } /* end tpos */ } /* end lpos */ for (dptr = f_str->diag; dptr < dpmax;) { if (dptr->score > f_str->lowscor) savemax (dptr, f_str, maxsav,0,-1); dptr->stop = -1; dptr->dmax = NULL; dptr++->score = 0; } f_str->ndo = nd;/* at this point all of the elements of aa1[lpos] have been searched for elements of aa0[tpos] with the results in diag[dpos]*/ for (nsave=0, vmptr=f_str->vmax; vmptr< vmaxmax; vmptr++) { if (vmptr->score > 0) { /* fprintf(stderr,"%c 0: %4d-%4d 1: %4d-%4d dp: %d score: %d", (vmptr->exact ? 'x' : ' '), f_str->noff+vmptr->start-vmptr->dp, f_str->noff+vmptr->stop-vmptr->dp, vmptr->start,vmptr->stop, vmptr->dp,vmptr->score); */ vmptr->score = spam (aa0, aa1, n1, vmptr, ppst->pam2[0], f_str); /* fprintf(stderr," sscore: %d %d-%d\n",vmptr->score,vmptr->start,vmptr->stop); */ if (vmptr->score > 0) f_str->vptr[nsave++] = vmptr; } } if (nsave <= 0) { rst->score[0] = rst->score[1] = rst->score[2] = 0; rst->escore = 1.0; rst->segnum = 0; rst->seglen = 0; f_str->nsave = 0; return; } /* fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,f_str->noff); for (ib=0; ib<nsave; ib++) { fprintf(stderr,"%c 0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n", f_str->vptr[ib]->exact ? 'x' : ' ', f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp, f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp, f_str->vptr[ib]->start,f_str->vptr[ib]->stop, f_str->vptr[ib]->dp,f_str->vptr[ib]->score); } fprintf(stderr,"---\n"); */ kssort(f_str->vptr,nsave); /* make certain each seg is used only once */ for (ib=0; ib<f_str->nm0; ib++) f_str->nm_u[ib]=0; for (ib=0; ib < nsave; ib++) { doffset = f_str->vptr[ib]->dp - f_str->noff; tpos=f_str->aa0i[f_str->vptr[ib]->start - doffset]; if (f_str->nm_u[tpos] == 0) { f_str->nm_u[tpos]=1; } else { f_str->vptr[ib]->score = -1; } } kssort(f_str->vptr,nsave); for (ib = nsave-1; ib >= 0; ib--) { if (f_str->vptr[ib]->score > -1) break; } nsave = ib+1;#ifdef DEBUG /* for (ib = 0; ib < nsave; ib++) { if (f_str->vptr[ib]->score > 1000) { fprintf(stderr," score[%d] too high: %d\n",ib, f_str->vptr[ib]->score); for (i=0; i< 10; i++) { fprintf(stderr, "%c:%d ",ppst->sq[aa1[i]],aa1[i]); } fprintf(stderr,"\n"); f_str->vptr[ib]->score = 0; } } */#endif scor = sconn (f_str->vptr, nsave, f_str, rst, ppst, aa0, n0, aa1, n1, opt_prob); if (rst->escore < 0.0) rst->escore = 2.0; kssort(f_str->vptr,nsave); /* here we should use an nsave that is consistent with sconn and nm0 */ f_str->nsave = nsave; if (nsave > f_str->nm0) f_str->nsave = f_str->nm0; rst->score[1] = f_str->vptr[0]->score; rst->score[0] = rst->score[2] = max(scor, f_str->vptr[0]->score);}void do_work (const unsigned char *aa0, const int n0, const unsigned char *aa1, const int n1, int frame, struct pstruct *ppst, struct f_struct *f_str, int qr_flg, struct rstruct *rst){ int opt_prob; int hoff, n10, i; if (qr_flg==1 && f_str->shuff_cnt <= 0) { rst->escore = 2.0; rst->score[0]=rst->score[1]=rst->score[2]= -1; return; } if (f_str->dotat || ppst->zsflag == 4 || ppst->zsflag == 14 ) opt_prob=1; else opt_prob = 0; if (ppst->zsflag == 2 || ppst->zsflag == 12) opt_prob = 0; if (qr_flg) { opt_prob=1; /* if (frame==1) */ f_str->shuff_cnt--; } if (n1 < ppst->param_u.fa.ktup) { rst->score[0] = rst->score[1] = rst->score[2] = -1; rst->escore = 2.0; return; }#ifdef TFAST n10=aatran(aa1,f_str->aa1x,n1,frame); if (ppst->debug_lib) for (i=0; i<n10; i++) if (f_str->aa1x[i]>ppst->nsq) { fprintf(stderr, "residue[%d/%d] %d range (%d)\n",i,n1, f_str->aa1x[i],ppst->nsq); f_str->aa1x[i]=0; n10=i-1; } do_fasts (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, opt_prob, f_str->maxsav);#else /* FASTA */ do_fasts (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, opt_prob, f_str->maxsav);#endif rst->comp = rst->H = -1.0;}void do_opt (const unsigned char *aa0, const int n0, const unsigned char *aa1, const int n1, int frame, struct pstruct *ppst, struct f_struct *f_str, struct rstruct *rst){ int lag, tscore, hoff, n10;#ifdef TFAST n10=aatran(aa1,f_str->aa1x,n1,frame); do_fasts (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, 1, f_str->maxsav);#else /* FASTA */ do_fasts(aa0,n0,aa1,n1,ppst,f_str,rst, &hoff, 1, f_str->maxsav);#endif}/* modify savemax() so that full length 100% matches are marked so that they cannot be removed - if we have a 100% match, mark "exact" modify savemax() to split alignments that include a comma*//* savemax(dptr, f_str, maxsav) takes a current diagonal run (saved in dptr), and places it in the set of runs to be saved (in f_str->vmax[])*/void savemax (struct dstruct *dptr, struct f_struct *f_str, int maxsav, int exact, int tpos){ register int dpos; /* position along the diagonal, -n0 .. n1 */ int i, j, lowj; register struct savestr *vmptr; struct savestr *vmaxmax; vmaxmax = &f_str->vmax[maxsav]; dpos = (int) (dptr - f_str->diag); /* current diagonal *//* check to see if this is the continuation of a run that is already saved *//* if we are at the end of the query, save it regardless *//* if (t_end > 0 && t_end < dptr->stop - dptr->start) {return;} */ if ((vmptr = dptr->dmax) != NULL /* have an active run */ && vmptr->dp == dpos && /* on the correct diagonal */ vmptr->start == dptr->start) { /* and it starts at the same place */ vmptr->stop = dptr->stop; /* update the end of the match in vmax[] */ if (exact == 1) { /* fprintf(stderr,"have cont exact match: %d - %d:%d %d:%d = %d\n", dptr->score, dptr->start, dptr->stop, vmptr->start, vmptr->stop, dptr->stop - dptr->start+1); */ exact = 1; }/* if the score is worse, don't update, return - if the score gets bad enough, it will restart in the diagonal scan */ if ((i = dptr->score) <= vmptr->score) { return;} /* score is better, update */ vmptr->score = i; vmptr->exact = exact;/* if the score is not the worst, return */ if (vmptr != f_str->lowmax) { return;} } else { /* not a continuation */ /* save in the lowest place */ /* fprintf(stderr," Replacing: %d - %d:%d => %d - %d:%d", f_str->lowmax->score, f_str->lowmax->start, f_str->lowmax->stop, dptr->score, dptr->start, dptr->stop); */ vmptr = f_str->lowmax; /* if (exact == 1) { fprintf(stderr,"have new exact match: %d - %d:%d = %d\n", dptr->score, dptr->start, dptr->stop, dptr->stop - dptr->start+1); } */ vmptr->exact = exact; i = vmptr->score = dptr->score; /* 'i' is used as a bound */ vmptr->dp = dpos; vmptr->start = dptr->start; vmptr->stop = dptr->stop; dptr->dmax = vmptr; } /* rescan the list for the worst score */ for (vmptr = f_str->vmax; vmptr < &f_str->vmax[maxsav] ; vmptr++) { if (vmptr->score < i && !vmptr->exact) { i = vmptr->score; f_str->lowmax = vmptr; } } f_str->lowscor = i;}/* this version of spam scans the diagonal to find the best local score, then resets the boundaries for a global alignment and re-scans *//* NOOVERHANG allows one to score any overhanging alignment as zero. Useful for SAGE alignments. Normally, one allows overhangs because of the possibility of partial sequences.*/#undef NOOVERHANG/* May, 2005 - spam() has an intesting bug that occurs when two peptides match in order, separated by one position (the comma). In this case, spam() splits the match, and only returns the better of the two matches. So, if spam splits an alignment at a comma, it needs the ability to insert the missing match.*/int spam (const unsigned char *aa0, const unsigned char *aa1,int n1, struct savestr *dmax, int **pam2, struct f_struct *f_str){ int lpos, doffset; int tot, mtot; struct { int start, stop, score; } curv, maxv; register const unsigned char *aa0p, *aa1p; curv.start = dmax->start; aa1p = &aa1[dmax->start]; doffset = dmax->dp - f_str->noff; aa0p = &aa0[dmax->start - doffset]; tot = curv.score = maxv.score = 0; for (lpos = dmax->start; lpos <= dmax->stop; lpos++) { tot += pam2[*aa0p++][*aa1p++]; if (tot > curv.score) { curv.stop = lpos; /* here, curv.stop is actually curv.max */ curv.score = tot; } else if (tot < 0) { if (curv.score > maxv.score) { maxv.start = curv.start; maxv.stop = curv.stop; maxv.score = curv.score; } tot = curv.score = 0; curv.start = lpos+1; } } if (curv.score > maxv.score) { maxv.start = curv.start; maxv.stop = curv.stop; maxv.score = curv.score; } if (maxv.score <= 0) return 0; /* now, reset the boundaries of the alignment using aa0b[] and aa0e[], which specify the residues that start and end the segment */ maxv.start = f_str->aa0b[maxv.stop-doffset] + doffset; if (maxv.start < 0) { maxv.start = 0;#ifdef NOOVERHANG return 0;#endif } maxv.stop = f_str->aa0e[maxv.stop-doffset] + doffset; if (maxv.stop > n1) { maxv.stop = n1-1;#ifdef NOOVERHANG return 0;#endif } aa1p = &aa1[lpos = maxv.start]; aa0p = &aa0[lpos - doffset]; for (tot=0; lpos <= maxv.stop; lpos++) { tot += pam2[*aa0p++][*aa1p++]; } maxv.score = tot;/* if (maxv.start != dmax->start || maxv.stop != dmax->stop) printf(" new region: %3d %3d %3d %3d\n",maxv.start, dmax->start,maxv.stop,dmax->stop);*/ dmax->start = maxv.start; dmax->stop = maxv.stop; return maxv.score;}int sconn (struct savestr **v, int n, struct f_struct *f_str, struct rstruct *rst, struct pstruct *ppst, const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int opt_prob){ int i, si, cmpp (); struct slink *start, *sl, *sj, *so, *sarr; int lstart, ltmp, tstart, plstop, ptstop, ptstart, tstop; double tatprob; int dotat; sarr = f_str->sarr; /* sort the score left to right in lib pos */ kpsort (v, n); start = NULL; rst->score[0] = 0; rst->escore = 2.0;/* for the remaining runs, see if they fit *//* lstart/lstop -> start/stop in library sequence tstart/tstop -> start/stop in query sequence plstart/plstop ->*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -