📄 dropnfa.c
字号:
/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia *//* $Name: fa35_03_06 $ - $Id: dropnfa.c,v 1.91 2008/02/19 08:50:13 wrp Exp $ *//* 18-Sep-2006 - removed global variables for alignment from nw_align and bg_align *//* 18-Oct-2005 - converted to use a_res and aln for alignment coordinates *//* 14-May-2003 - modified to return alignment start at 0, rather than 1, for begin:end alignments*//* implements the fasta algorithm, see: W. R. Pearson, D. J. Lipman (1988) "Improved tools for biological sequence comparison" Proc. Natl. Acad. Sci. USA 85:2444-2448 This version uses Smith-Waterman for final protein alignments W. R. Pearson (1996) "Effective protein sequence comparison" Methods Enzymol. 266:227-258 26-April-2001 - -DGAP_OPEN redefines -f, as gap open penalty 4-Nov-2001 - modify spam() while(1).*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include <math.h>#include "defs.h"#include "param.h"/* this must be consistent with upam.h */#define MAXHASH 32#define NMAP MAXHASH+1/* globals for fasta */#define MAXWINDOW 64#ifndef MAXSAV#define MAXSAV 10#endif#ifndef ALLOCN0static char *verstr="3.5 Sept 2006";#elsestatic char *verstr="3.5an0 Sept 2006";#endifextern void w_abort(char *, char *);int shscore(const unsigned char *aa0, int n0, int **pam2);extern void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst, double *aa0_f, double **kp);extern void init_karlin_a(struct pstruct *, double *, double **);extern int do_karlin(const unsigned char *, int n1, int **, struct pstruct *, double *, double *, double *, double *);extern void aancpy(char *to, char *from, int count, struct pstruct *ppst);char *ckalloc(size_t);#ifdef TFASTAextern int aatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);#endif#include "dropnfa.h"#define DROP_INTERN#include "drop_func.h"struct swstr { int H, E;};intdmatch (const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int hoff, int window, int **pam2, int gdelval, int ggapval, struct f_struct *f_str);extern int sw_walign (int **pam2p, int n0, const unsigned char *aa1, int n1, int q, int r, struct swstr *ss, struct a_res_str *a_res );/* initialize for fasta */voidinit_work (unsigned char *aa0, int n0, struct pstruct *ppst, struct f_struct **f_arg){ int mhv, phv; int hmax; int i0, hv; int pamfact; int btemp; struct f_struct *f_str; /* these used to be globals, but do not need to be */ int ktup; /* word size examined */ int fact; /* factor used to scale ktup match value */ int kt1; /* ktup-1 */ int lkt; /* last ktup - initiall kt1, but can be increased for hsq >= NMAP */ int maxn0; /* used in band alignment */ int *pwaa_a, *pwaa_s; /* pam[aa0[]] profile */ int i, j, e, f; struct swstr *ss; int **pam2p; int *waa; int nsq, ip, *hsq; if (ppst->ext_sq_set) { nsq = ppst->nsqx; ip = 1; hsq = ppst->hsqx; } else { nsq = ppst->nsq; ip = 0; hsq = ppst->hsq; } f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));#ifndef TFASTA if((ppst->zsflag%10) == 6) { f_str->kar_p = NULL; init_karlin(aa0, n0, ppst, &f_str->aa0_f[0], &f_str->kar_p); }#endif btemp = 2 * ppst->param_u.fa.bestoff / 3 + n0 / ppst->param_u.fa.bestscale + ppst->param_u.fa.bkfact * (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup); if (ppst->nt_align) btemp = (btemp*ppst->pam_h)/5; /* normalize to standard +5/-4 */ btemp = min (btemp, ppst->param_u.fa.bestmax); if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5; ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3; if (ppst->param_u.fa.optcut_set != 1)#ifndef TFASTA ppst->param_u.fa.optcut = btemp;#else ppst->param_u.fa.optcut = (btemp*3)/2;#endif#ifndef OLD_FASTA_GAP ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;#else ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;#endif pamfact = ppst->param_u.fa.pamfact; ktup = ppst->param_u.fa.ktup; fact = ppst->param_u.fa.scfact * ktup; if (pamfact == -1) pamfact = 0; else if (pamfact == -2) pamfact = 1; for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++) if (hsq[i0] < NMAP && hsq[i0] > mhv) mhv = hsq[i0]; if (mhv <= 0) { fprintf (stderr, " maximum hsq <=0 %d\n", mhv); exit (1); } for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++; /* kshft = 2; */ kt1 = ktup - 1; hv = 1; for (i0 = 0; i0 < ktup; i0++) hv = hv << f_str->kshft; hmax = hv; f_str->hmask = (hmax >> f_str->kshft) - 1; if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) { fprintf (stderr, " *** cannot allocate hash array: hmax: %d hmask: %d\n", hmax, f_str->hmask); exit (1); } if ((f_str->pamh1 = (int *) calloc (nsq+1, sizeof (int))) == NULL) { fprintf (stderr, " *** cannot allocate pamh1 array nsq=%d\n",nsq); exit (1); } if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) { fprintf (stderr, " *** cannot allocate pamh2 array hmax=%d\n",hmax); exit (1); } if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) { fprintf (stderr, " *** cannot allocate hash link array n0=%d",n0); exit (1); } for (i0 = 0; i0 < hmax; i0++) f_str->harr[i0] = -1; for (i0 = 0; i0 < n0; i0++) f_str->link[i0] = -1; /* encode the aa0 array */ phv = hv = 0; lkt = kt1; /* restart hv, phv calculation */ for (i0 = 0; i0 < min(lkt,n0); i0++) { if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt = i0+ ktup; continue;} hv = (hv << f_str->kshft) + hsq[aa0[i0]]; phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup; } for (; i0 < n0; i0++) { if (hsq[aa0[i0]] >= NMAP) { hv=phv=0; /* restart hv, phv calculation */ for (lkt = i0+kt1; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) { if (hsq[aa0[i0]] >= NMAP) { hv=phv=0; lkt = i0+ktup; continue; } hv = (hv << f_str->kshft) + hsq[aa0[i0]]; phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup; } } if (i0 >= n0) break; hv = ((hv & f_str->hmask) << f_str->kshft) + hsq[aa0[i0]]; f_str->link[i0] = f_str->harr[hv]; f_str->harr[hv] = i0; if (pamfact) { f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup); /* this check should always be true, but just in case */ if (hsq[aa0[i0-kt1]]<NMAP) phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup; } else f_str->pamh2[hv] = fact * ktup; } /* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the pam2[0][0] is now undefined for consistency with blast */ if (pamfact) for (i0 = 1; i0 <= nsq; i0++) f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup; else for (i0 = 1; i0 <= nsq; i0++) f_str->pamh1[i0] = fact; f_str->ndo = 0; f_str->noff = n0-1;#ifndef ALLOCN0 if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG, sizeof (struct dstruct)))==NULL) { fprintf (stderr," *** cannot allocate diagonal arrays: %lu\n", MAXDIAG *sizeof (struct dstruct)); exit (1); };#else if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0, sizeof (struct dstruct)))==NULL) { fprintf (stderr," *** cannot allocate diagonal arrays: %ld\n", (long)n0*sizeof (struct dstruct)); exit (1); };#endif#ifdef TFASTA if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2, sizeof(unsigned char))) == NULL) { fprintf (stderr, " *** cannot allocate aa1x array %d\n", ppst->maxlen+2); exit (1); } f_str->aa1x++;#endif f_str->bss = (struct bdstr *) calloc((size_t)ppst->param_u.fa.optwid*2+4, sizeof(struct bdstr)); f_str->bss++; /* allocate space for the scoring arrays */ maxn0 = n0 + 4; if ((ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr))) == NULL) { fprintf (stderr, " *** cannot allocate ss array %3d\n", n0); exit (1); } ss++; ss[n0].H = -1; /* this is used as a sentinel - normally H >= 0 */ ss[n0].E = 1; f_str->ss = ss; /* initialize variable (-S) pam matrix */ if ((f_str->waa_s= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) { fprintf(stderr," *** cannot allocate waa_s array %3d\n",nsq*n0); exit(1); } /* initialize pam2p[1] pointers */ if ((f_str->pam2p[1]= (int **)calloc((n0+1),sizeof(int *))) == NULL) { fprintf(stderr," *** cannot allocate pam2p[1] array %3d\n",n0); exit(1); } pam2p = f_str->pam2p[1]; if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) { fprintf(stderr," *** cannot allocate pam2p[1][] array %3d\n",nsq*n0); exit(1); } for (i=1; i<n0; i++) { pam2p[i]= pam2p[0] + (i*(nsq+1)); } /* initialize universal (alignment) matrix */ if ((f_str->waa_a= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) { fprintf(stderr," *** cannot allocate waa_a struct %3d\n",nsq*n0); exit(1); } /* initialize pam2p[0] pointers */ if ((f_str->pam2p[0]= (int **)calloc((n0+1),sizeof(int *))) == NULL) { fprintf(stderr," *** cannot allocate pam2p[1] array %3d\n",n0); exit(1); } pam2p = f_str->pam2p[0]; if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) { fprintf(stderr," *** cannot allocate pam2p[1][] array %3d\n",nsq*n0); exit(1); } for (i=1; i<n0; i++) { pam2p[i]= pam2p[0] + (i*(nsq+1)); } /* pwaa effectively has a sequence profile -- pwaa[0..n0-1] has pam score for residue 0 (-BIGNUM) pwaa[n0..2n0-1] has pam scores for residue 1 (A) pwaa[2n0..3n-1] has pam scores for residue 2 (R), ... thus: pwaa = f_str->waa_s + (*aa1p++)*n0; sets up pwaa so that *pwaa++ rapidly moves though the scores of the aa1p[] position without further indexing For a real sequence profile, pwaa[0..n0-1] vs ['A'] could have a different score in each position. */ if (ppst->pam_pssm) { pwaa_s = f_str->waa_s; pwaa_a = f_str->waa_a; for (e = 0; e <=nsq; e++) { /* for each residue in the alphabet */ for (f = 0; f < n0; f++) { /* for each position in aa0 */ *pwaa_s++ = f_str->pam2p[ip][f][e] = ppst->pam2p[ip][f][e]; *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2p[0][f][e]; } } } else { /* initialize scanning matrix */ pwaa_s = f_str->waa_s; pwaa_a = f_str->waa_a; for (e = 0; e <=nsq; e++) /* for each residue in the alphabet */ for (f = 0; f < n0; f++) { /* for each position in aa0 */ *pwaa_s++ = f_str->pam2p[ip][f][e]= ppst->pam2[ip][aa0[f]][e]; *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2[0][aa0[f]][e]; } } f_str->max_res = max(3*n0/2,MIN_RES); *f_arg = f_str;}/* pstring1 is a message to the manager, currently 512 *//* pstring2 is the same information, but in a markx==10 format */voidget_param (struct pstruct *ppstr, char **pstring1, char *pstring2){#ifndef TFASTA char *pg_str="FASTA";#else char *pg_str="TFASTA";#endif if (!ppstr->param_u.fa.optflag) { sprintf (pstring1[0], "%s (%s)", pg_str, verstr); if (ppstr->param_u.fa.iniflag) strcat(pstring1[0]," init1"); } else { sprintf (pstring1[0], "%s (%s) [optimized]", pg_str, verstr); } sprintf (pstring1[1], #ifdef OLD_FASTA_GAP "%s matrix (%d:%d)%s ktup: %d\n join: %d, opt: %d, gap-pen: %d/%d, width: %3d",#else "%s matrix (%d:%d)%s ktup: %d\n join: %d, opt: %d, open/ext: %d/%d, width: %3d",#endif ppstr->pamfile, ppstr->pam_h,ppstr->pam_l, (ppstr->ext_sq_set) ? "xS":"\0", ppstr->param_u.fa.ktup, ppstr->param_u.fa.cgap, ppstr->param_u.fa.optcut, ppstr->gdelval, ppstr->ggapval, ppstr->param_u.fa.optwid); if (pstring2 != NULL) { sprintf (pstring2, #ifdef OLD_FASTA_GAP "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\; pg_gap-pen: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",#else "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\; pg_open-ext: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",#endif pg_str,verstr,ppstr->pamfile, ppstr->pam_h,ppstr->pam_l, ppstr->gdelval, ppstr->ggapval,ppstr->param_u.fa.ktup,ppstr->param_u.fa.optcut, ppstr->param_u.fa.cgap); }}voidclose_work (const unsigned char *aa0, int n0, struct pstruct *ppst, struct f_struct **f_arg){ struct f_struct *f_str; f_str = *f_arg; if (f_str != NULL) { if (f_str->kar_p!=NULL) free(f_str->kar_p); f_str->ss--; f_str->bss--; /* free(f_str->res); */ free(f_str->waa_a); free(f_str->waa_s); free(f_str->pam2p[1][0]); free(f_str->pam2p[1]); free(f_str->pam2p[0][0]); free(f_str->pam2p[0]); free(f_str->ss); free(f_str->bss); free(f_str->diag); free(f_str->link); free(f_str->pamh2); free(f_str->pamh1); free(f_str->harr); free(f_str); *f_arg = NULL; }}#ifdef ALLOCN0void savemax (struct dstruct *, int, struct f_struct *);#elsevoid savemax (struct dstruct *, struct f_struct *);#endifint spam (const unsigned char *, const unsigned char *, struct savestr *, int **, int, int, int);int sconn(struct savestr **, int nsave, int cgap, int pgap, int noff);void kpsort(struct savestr **, int);extern intNW_ALIGN(const unsigned char *, const unsigned char *, int, int,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -