📄 dropff2.c
字号:
/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia *//* - dropffa.c,v 1.1.1.1 1999/10/22 20:55:59 wrp Exp *//* this code implements the "fastf" algorithm, which is designed to deconvolve mixtures of protein sequences derived from mixed-peptide Edman sequencing. The expected input is: >test | 40001 90043 | mgstm1 MGCEN, MIDYP, MLLAY, MLLGY Where the ','s indicate the length/end of the sequencing cycle data. Thus, in this example, the sequence is from a mixture of 4 peptides, M was found in the first position, G,I, and L(2) at the second, C,D, L(2) at the third, etc. Because the sequences are derived from mixtures, there need not be any partial sequence "MGCEN", the actual deconvolved sequence might be "MLDGN".*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdlib.h>#include <math.h>#include <ctype.h>#include "defs.h"#include "param.h"#include "structs.h"#include "tatstats.h"#define EOSEQ 0 #define ESS 49#define MAXHASH 32#define NMAP MAXHASH+1#define NMAP_X 23 /* re-code NMAP for 'X' */#define NMAP_Z 24 /* re-code NMAP for '*' */#ifndef MAXSAV#define MAXSAV 10#endif#define DROP_INTERN#include "drop_func.h"static char *verstr="4.21 May 2006 (ajm/wrp)";int shscore(unsigned char *aa0, const int n0, int **pam2, int nsq);#ifdef TFASTextern int aatran(const unsigned char *ntseq, unsigned char *aaseq, const int maxs, const int frame);#endifstruct hlstr { int next, pos;};void savemax(struct dstruct *, struct f_struct *);static int m0_spam(unsigned char *, const unsigned char *, int, struct savestr *, int **, struct f_struct *);static int m1_spam(unsigned char *, int, const unsigned char *, int, struct savestr *, int **, int, struct f_struct *);int sconn(struct savestr **v, int nsave, int cgap, struct f_struct *, struct rstruct *, struct pstruct *, const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int opt_prob);void kpsort(struct savestr **, int);void kssort(struct savestr **, int);void kpsort(struct savestr **, int);intsconn_a(unsigned char *, int, int, struct f_struct *, struct a_res_str *);/* initialize for fasta */voidinit_work (unsigned char *aa0, int n0, struct pstruct *ppst, struct f_struct **f_arg){ int mhv, phv; int hmax; int i0, ii0, hv; struct f_struct *f_str; int maxn0; int i, j, q; struct savestr *vmptr; int *res; f_str = (struct f_struct *) calloc(1, sizeof(struct f_struct)); if(f_str == NULL) { fprintf(stderr, "Couldn't calloc f_str\n"); exit(1); } ppst->sw_flag = 0; /* fastf3 cannot work with lowercase symbols as low complexity; thus, NMAP must be disabled; this depends on aascii['X'] */ if (ppst->hsq[NMAP_X] == NMAP ) {ppst->hsq[NMAP_X]=1;} if (ppst->hsq[NMAP_Z] == NMAP ) {ppst->hsq[NMAP_Z]=1;} /* this does not work for share ppst structs, as in threads */ /*else {fprintf(stderr," cannot find 'X'==NMAP\n");} */ for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++) if (ppst->hsq[i0] < NMAP && ppst->hsq[i0] > mhv) mhv = ppst->hsq[i0]; if (mhv <= 0) { fprintf (stderr, " maximum hsq <=0 %d\n", mhv); exit (1); } for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;/* kshft = 2; */ hmax = hv = (1 << f_str->kshft); f_str->hmask = (hmax >> f_str->kshft) - 1; if ((f_str->aa0 = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) { fprintf (stderr, " cannot allocate f_str->aa0 array; %d\n",n0+1); exit (1); } for (i=0; i<n0; i++) f_str->aa0[i] = aa0[i]; aa0 = f_str->aa0; if ((f_str->aa0t = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) { fprintf (stderr, " cannot allocate f_str0->aa0t array; %d\n",n0+1); exit (1); } f_str->aa0ix = 0; if ((f_str->harr = (struct hlstr *) calloc (hmax, sizeof (struct hlstr))) == NULL) { fprintf (stderr, " cannot allocate hash array; hmax: %d hmask: %d\n", hmax,f_str->hmask); exit (1); } if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) { fprintf (stderr, " cannot allocate pamh1 array\n"); exit (1); } if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) { fprintf (stderr, " cannot allocate pamh2 array\n"); exit (1); } if ((f_str->link = (struct hlstr *) calloc (n0, sizeof (struct hlstr))) == NULL) { fprintf (stderr, " cannot allocate hash link array"); exit (1); } for (i0 = 0; i0 < hmax; i0++) { f_str->harr[i0].next = -1; f_str->harr[i0].pos = -1; } for (i0 = 0; i0 < n0; i0++) { f_str->link[i0].next = -1; f_str->link[i0].pos = -1; } /* encode the aa0 array */ /* this code has been modified to allow for mixed peptide sequences aa0[] = 5 8 9 3 4 NULL 5 12 3 7 2 NULL the 'NULL' character resets the hash position counter, to indicate that any of several residues can be in the same position. We also need to keep track of the number of times this has happened, so that we can redivide the sequence later i0 counts through the sequence ii0 counts through the hashed sequence */ f_str->nm0 = 1; f_str->nmoff = -1; phv = hv = 0; for (i0= ii0 = 0; i0 < n0; i0++, ii0++) { /* reset the counter and start hashing again */ if (aa0[i0] == ESS || aa0[i0] == 0) { aa0[i0] = 0; /* set ESS to 0 */ /* fprintf(stderr," converted ',' to 0\n");*/ i0++; /* skip over the blank */ f_str->nm0++; if (f_str->nmoff < 0) f_str->nmoff = i0; phv = hv = 0; ii0 = 0; } hv = ppst->hsq[aa0[i0]]; f_str->link[i0].next = f_str->harr[hv].next; f_str->link[i0].pos = f_str->harr[hv].pos; f_str->harr[hv].next = i0; f_str->harr[hv].pos = ii0; f_str->pamh2[hv] = ppst->pam2[0][aa0[i0]][aa0[i0]]; } if (f_str-> nmoff < 0) f_str->nmoff = n0;#ifdef DEBUG /* fprintf(stderr," nmoff: %d/%d nm0: %d\n", f_str->nmoff, n0,f_str->nm0); */#endif/*#ifdef DEBUG fprintf(stderr," hmax: %d\n",hmax); for ( hv=0; hv<hmax; hv++) fprintf(stderr,"%2d %c %3d %3d\n",hv, (hv > 0 && hv < ppst->nsq ) ? ppst->sq[ppst->hsq[hv]] : ' ', f_str->harr[hv].pos,f_str->harr[hv].next); fprintf(stderr,"----\n"); for ( hv=0; hv<n0; hv++) fprintf(stderr,"%2d: %3d %3d\n",hv, f_str->link[hv].pos,f_str->link[hv].next);#endif*/ f_str->maxsav = MAXSAV; if ((f_str->vmax = (struct savestr *) calloc(MAXSAV,sizeof(struct savestr)))==NULL) { fprintf(stderr, "Couldn't allocate vmax[%d].\n",f_str->maxsav); exit(1); } if ((f_str->vptr = (struct savestr **) calloc(MAXSAV,sizeof(struct savestr *)))==NULL) { fprintf(stderr, "Couldn't allocate vptr[%d].\n",f_str->maxsav); exit(1); } for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) { vmptr->used = (int *) calloc(n0, sizeof(int)); if(vmptr->used == NULL) { fprintf(stderr, "Couldn't alloc vmptr->used\n"); exit(1); } }/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the pam2[0][0] is now undefined for consistency with blast*/ for (i0 = 1; i0 <= ppst->nsq; i0++) f_str->pamh1[i0] = ppst->pam2[0][i0][i0]; ppst->param_u.fa.cgap = shscore(aa0,f_str->nmoff-1,ppst->pam2[0],ppst->nsq)/3; if (ppst->param_u.fa.cgap > ppst->param_u.fa.bestmax/4) ppst->param_u.fa.cgap = ppst->param_u.fa.bestmax/4; f_str->ndo = 0; f_str->noff = n0-1; if (f_str->diag==NULL) f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG, sizeof (struct dstruct)); if (f_str->diag == NULL) { fprintf (stderr, " cannot allocate diagonal arrays: %ld\n", (long) MAXDIAG * (long) (sizeof (struct dstruct))); exit (1); }#ifdef TFAST if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2, sizeof(unsigned char))) == NULL) { fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2); exit (1); } f_str->aa1x++;#endif /* allocate space for the scoring arrays */ maxn0 = n0 + 4; maxn0 = max(3*n0/2,MIN_RES); if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) { fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0); exit(1); } f_str->res = res; f_str->max_res = maxn0; /* Tatusov Statistics Setup */ /* initialize priors array. */ if((f_str->priors = (double *)calloc(ppst->nsq+1, sizeof(double))) == NULL) { fprintf(stderr, "Couldn't allocate priors array.\n"); exit(1); } calc_priors(f_str->priors, ppst, f_str, NULL, 0, ppst->pseudocts); f_str->dotat = 0; f_str->shuff_cnt = ppst->shuff_node; /* End of Tatusov Statistics Setup */ *f_arg = f_str;}/* pstring1 is a message to the manager, currently 512 *//* pstring2 is the same information, but in a markx==10 format */voidget_param (struct pstruct *ppstr, char **pstring1, char *pstring2){#ifndef TFAST char *pg_str="FASTF";#else char *pg_str="TFASTF";#endif sprintf (pstring1[0], "%s (%s)",pg_str,verstr); sprintf (pstring1[1], "%s matrix (%d:%d) join: %d", ppstr->pamfile, ppstr->pam_h,ppstr->pam_l,ppstr->param_u.fa.cgap); if (ppstr->param_u.fa.iniflag) strcat(pstring1[0]," init1"); if (pstring2 != NULL) { sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\; pg_join: %d\n", pg_str,verstr, ppstr->pamfile, ppstr->pam_h,ppstr->pam_l, ppstr->param_u.fa.cgap); }}voidclose_work (const unsigned char *aa0, const int n0, struct pstruct *ppst, struct f_struct **f_arg){ struct f_struct *f_str; struct savestr *vmptr; f_str = *f_arg; if (f_str != NULL) { for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) free(vmptr->used); free(f_str->res);#ifdef TFAST free(f_str->aa1x - 1); /* allocated, then aa1x++'ed */#endif free(f_str->diag); free(f_str->link); free(f_str->pamh2); free(f_str->pamh1); free(f_str->harr); free(f_str->aa0t); free(f_str->aa0); free(f_str->priors); free(f_str->vmax); free(f_str->vptr); free(f_str); *f_arg = NULL; }}int do_fastf (unsigned char *aa0, int n0, const unsigned char *aa1, int n1, struct pstruct *ppst, struct f_struct *f_str, struct rstruct *rst, int *hoff, int opt_prob){ int nd; /* diagonal array size */ int lhval; int kfact; register struct dstruct *dptr; register int tscor; register struct dstruct *diagp; struct dstruct *dpmax; register int lpos; int tpos, npos; struct savestr *vmptr; int scor, tmp; int im, ib, nsave; int cmps (); /* comparison routine for ksort */ int *hsq; hsq = ppst->hsq; if (n1 < 1) { rst->score[0] = rst->score[1] = rst->score[2] = 0; rst->escore = 1.0; rst->segnum = 0; rst->seglen = 0; return 1; } if (n0+n1+1 >= MAXDIAG) { fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1); rst->score[0] = rst->score[1] = rst->score[2] = -1; rst->escore = 2.0; rst->segnum = 0; rst->seglen = 0; return -1; } nd = n0 + n1; dpmax = &f_str->diag[nd]; for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;) { dptr->stop = -1; dptr->dmax = NULL; dptr++->score = 0; } /* initialize the saved segment structures */ for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) { vmptr->score = 0; memset(vmptr->used, 0, n0 * sizeof(int)); } f_str->lowmax = f_str->vmax; f_str->lowscor = 0; /* start hashing */ diagp = &f_str->diag[f_str->noff]; for (lhval = lpos = 0; lpos < n1; lpos++, diagp++) { if (hsq[aa1[lpos]]>=NMAP) { lpos++ ; diagp++; while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;} if (lpos >= n1) break; lhval = 0; } lhval = hsq[aa1[lpos]]; for (tpos = f_str->harr[lhval].pos, npos = f_str->harr[lhval].next; tpos >= 0; tpos = f_str->link[npos].pos, npos = f_str->link[npos].next) { /* tscor gets position of end of current lpos diag run */ if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) { tscor++; /* move forward one */ if ((tscor -= lpos) <= 0) { /* check for size of gap to this hit - */ /* includes implicit -1 mismatch penalty */ scor = dptr->score; /* current score of this run */ if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && f_str->lowscor < scor) /* if updating tscor makes run worse, */ savemax (dptr, f_str); /* save it */ if ((tscor += scor) >= kfact) { /* add to current run if continuing */ /* is better than restart (kfact) */ dptr->score = tscor; dptr->stop = lpos; } else { dptr->score = kfact; /* starting over is better */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -