📄 dropgsw2.c
字号:
/* copyright (c) 1996 William R. Pearson *//* $Name: fa35_03_06 $ - $Id: dropgsw2.c,v 1.9 2008/02/19 08:50:13 wrp Exp $ *//* 17-Aug-2006 - removed globals *sapp/last - alignment should be thread safe *//* 12-Oct-2005 - converted to use a_res and aln for alignment coordinates *//* 4-Nov-2004 - Diagonal Altivec Smith-Waterman included *//* 14-May-2003 - modified to return alignment start at 0, rather than 1, for begin:end alignments 25-Feb-2003 - modified to support Altivec parallel Smith-Waterman 22-Sep-2003 - removed Altivec support at request of Sencel lawyers*//* the do_walign() code in this file is not thread_safe *//* init_work(), do_work(), are thread safe *//* this code uses an implementation of the Smith-Waterman algorithm designed by Phil Green, U. of Washington, that is 1.5 - 2X faster than my Miller and Myers implementation. *//* the shortcuts used in this program prevent it from calculating scores that are less than the gap penalty for the first residue in a gap. As a result this code cannot be used with very large gap penalties, or with very short sequences, and probably should not be used with prss3.*//* version 3.2 fixes a subtle bug that was encountered while running do_walign() interspersed with do_work(). This happens only with -m 9 and pvcomplib. The fix was to more explicitly zero-out ss[] at the beginning of do_work.*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include <math.h>#include "defs.h"#include "param.h"static char *verstr="6.0 Mar 2007";#include "dropgsw2.h"#define DROP_INTERN#include "drop_func.h"#ifdef SW_ALTIVEC#include "smith_waterman_altivec.h"#endif#ifdef SW_SSE2#include "smith_waterman_sse2.h"#endifstruct swstr {int H, E;};extern void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst, double *aa0_f, double **kp);extern int do_karlin(const unsigned char *aa1, int n1, int **pam2, struct pstruct *ppst, double *aa0_f, double *kar_p, double *lambda, double *H);extern int sw_walign (int **pam2p, int n0, const unsigned char *aa1, int n1, int q, int r, struct swstr *ss, struct a_res_str *a_res );voidSIM(const unsigned char *A, /* seq1 indexed A[1..M] */ const unsigned char *B, /* seq2 indexed B[1..N] */ int M, int N, /* len seq1, seq2 */ struct pstruct *ppst, /* parameters */ int nseq, /* nseq - number of different sequences */ int mini_score, /* cut-off score */ int max_count, /* number of alignments */ struct a_res_str *a_res); /* alignment result structure */static intFLOCAL_ALIGN(const unsigned char *aa0, const unsigned char *aa1, int n0, int n1, int low, int up, int **W, int GG,int HH, int MW, struct f_struct *f_str);extern void aancpy(char *to, char *from, int count, struct pstruct *ppst);int same_seq(const unsigned char *aa0, int n0, const unsigned char *aa1, int n1);static intprof_score(const unsigned char *aa1, int n0, int *pwaa_s);/* initialize for Smith-Waterman optimal score */voidinit_work (unsigned char *aa0, int n0, struct pstruct *ppst, struct f_struct **f_arg){ int ip; int *pwaa_s, *pwaa_a; int e, f, i, j, l; int *res; struct f_struct *f_str; int **pam2p; struct swstr *ss; int nsq;#if defined(SW_ALTIVEC) || defined(SW_SSE2) int data,bias; unsigned char * pc; unsigned short * ps; int overflow; int n_count; int col_len;#endif if (ppst->ext_sq_set) { nsq = ppst->nsqx; ip = 1; } else { nsq = ppst->nsq; ip = 0; } /* allocate space for function globals */ f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct)); if(ppst->zsflag == 6 || ppst->zsflag == 16) { f_str->kar_p = NULL; init_karlin(aa0, n0, ppst, &f_str->aa0_f[0], &f_str->kar_p); } /* allocate space for the scoring arrays */ if ((ss = (struct swstr *) calloc (n0+2, sizeof (struct swstr))) == NULL) { fprintf (stderr, "cannot allocate ss array %3d\n", n0); exit (1); } ss++; ss[n0].H = -1; /* this is used as a sentinel - normally H >= 0 */ ss[n0].E = 1; f_str->ss = ss; /* initialize variable (-S) pam matrix */ if ((f_str->waa_s= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) { fprintf(stderr,"cannot allocate waa_s array %3d\n",nsq*n0); exit(1); } /* initialize pam2p[1] pointers */ if ((f_str->pam2p[1]= (int **)calloc((n0+1),sizeof(int *))) == NULL) { fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0); exit(1); } pam2p = f_str->pam2p[1]; if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) { fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0); exit(1); } for (i=1; i<n0; i++) { pam2p[i]= pam2p[0] + (i*(nsq+1)); } /* initialize universal (alignment) matrix */ if ((f_str->waa_a= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) { fprintf(stderr,"cannot allocate waa_a struct %3d\n",nsq*n0); exit(1); } /* initialize pam2p[0] pointers */ if ((f_str->pam2p[0]= (int **)calloc((n0+1),sizeof(int *))) == NULL) { fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0); exit(1); } pam2p = f_str->pam2p[0]; if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) { fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0); exit(1); } for (i=1; i<n0; i++) { pam2p[i]= pam2p[0] + (i*(nsq+1)); } /* pwaa effectively has a sequence profile -- pwaa[0..n0-1] has pam score for residue 0 (-BIGNUM) pwaa[n0..2n0-1] has pam scores for amino acid 1 (A) pwaa[2n0..3n0-1] has pam scores for amino acid 2 (R), ... thus: pwaa = f_str->waa_s + (*aa1p++)*n0; sets up pwaa so that *pwaa++ rapidly moves though the scores of the aa1p[] position without further indexing For a real sequence profile, pwaa[0..n0-1] vs ['A'] could have a different score in each position. */ if (ppst->pam_pssm) { pwaa_s = f_str->waa_s; pwaa_a = f_str->waa_a; for (e = 0; e <=nsq; e++) { /* for each residue in the alphabet */ for (f = 0; f < n0; f++) { /* for each position in aa0 */ *pwaa_s++ = f_str->pam2p[ip][f][e] = ppst->pam2p[ip][f][e]; *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2p[0][f][e]; } } } else { /* initialize scanning matrix */ pwaa_s = f_str->waa_s; pwaa_a = f_str->waa_a; for (e = 0; e <=nsq; e++) /* for each residue in the alphabet */ for (f = 0; f < n0; f++) { /* for each position in aa0 */ *pwaa_s++ = f_str->pam2p[ip][f][e]= ppst->pam2[ip][aa0[f]][e]; *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2[0][aa0[f]][e]; } }#if defined(SW_ALTIVEC) /* First we allocate memory for the workspace - i.e. the single row * of storage for H/F. Since this might be run on Linux or AIX too, * we don't assume anything about the memory allocation but align * it ourselves. We need two vectors (16 bytes each) per element, * and some padding space to make it cache-line aligned. * MAXTST+MAXLIB is longest allowed database sequence length... * this should be m_msg.max_tot, but m_msg is not available, but * ppst->maxlen has maxn, which is appropriate. */ f_str->workspace_memory = (void *)malloc(2*16*(ppst->maxlen+SEQ_PAD)+256); f_str->workspace = (void *) ((((size_t) f_str->workspace_memory) + 255) & (~0xff)); /* We always use a scoring profile in altivec, but the layout is a bit strange * in order to optimize memory access order and thus cache efficiency. * Normally we first try 8-bit scoring in altivec, and if this leads to overflow * we recompute the score with 16-bit accuracy. Because of this we need to construct * two score profiles. * Since altivec always loads 16 bytes from aligned memory, corresponding to 8 or 16 * elements (for 16 and 8 bit scoring, respectively), we organize the scoring * profile like this for 8-bit accuracy: * * 1. The profile starts on 256-byte aligned memory (cache line on G5 is 128 bytes). * 2. First we have the score for the full alphabet for the first 16 residues of * the query, i.e. positions 0-15 are the scores for the first 16 query letters * vs. the first in the alphabet, positions 16-31 the scores for the same 16 * query positions against alphabet letter two, etc. * 3. After alphabet_size*16bytes we start with the scores for residues 16-31 in * the query, organized in the same way. * 4. At the end of the query sequence, we pad the scoring to the next 16-tuple * with neutral scores. * 5. The total size of the profile is thus alphabet_size*N, where N is the * size of the query rounded up to the next 16-tuple. * * The word (16-bit) profile is identical, but scores are stored as 8-tuples. */ f_str->word_score_memory = (void *)malloc(10*2*(nsq+2)*(n0+1+16)+256); f_str->byte_score_memory = (void *)malloc(10*(nsq+2)*(n0+1+16)+256); f_str->word_score = (unsigned short *) ((((size_t) f_str->word_score_memory) + 255) & (~0xff)); f_str->byte_score = (unsigned char *) ((((size_t) f_str->byte_score_memory) + 255) & (~0xff)); overflow = 0; if (ppst->pam_pssm) { /* Use a position-specific scoring profile. * This is essentially what we are going to construct anyway, but we'll * reorder it to suit altivec. */ bias = 127; for(i = 1; i <= nsq ; i++) { for(j = 0; j < n0 ; j++) { data = ppst->pam2p[ip][j][i]; if(data<bias) bias = data; } } /* Fill our specially organized byte- and word-size scoring arrays. */ ps = f_str->word_score; for(f = 0; f<n0 ; f+=8) { /* e=0 */ for(i=0 ; i<8 ; i++) { *ps++ = (unsigned short) 0; } /* for each chunk of 8 residues in our query */ for(e = 1; e<=nsq; e++) { for(i=0 ; i<8 ; i++) { l = f + i; if(l<n0) { data = ppst->pam2p[ip][l][e] - bias; } else { data = 0; } *ps++ = (unsigned short)data; } } } pc = f_str->byte_score; for(f = 0; f<n0 ; f+=16) { /* e=0 */ for(i=0 ; i<16 ; i++) { *pc++ = (unsigned char)0; } for(e = 1; e<=nsq; e++) { for(i=0 ; i<16 ; i++) { l = f + i; if(l<n0) { data = ppst->pam2p[ip][l][e] - bias; } else { data = 0; } if(data>255) { /* printf("Fatal error. data: %d bias: %d, position: %d/%d, Score out of range for 8-bit Altivec/VMX datatype.\n",data,bias,l,e); exit(1); */ overflow = 1; } *pc++ = (unsigned char)data; } } } } else { /* Classical simple substitution matrix */ /* Find the bias to use in the substitution matrix */ bias = 127; for(i = 1; i <= nsq ; i++) { for(j = 1; j <= nsq ; j++) { data = ppst->pam2[ip][i][j]; if(data<bias) bias = data; } } /* Fill our specially organized byte- and word-size scoring arrays. */ ps = f_str->word_score; for(f = 0; f<n0 ; f+=8) { /* e=0 */ for(i=0 ; i<8 ; i++) { *ps++ = (unsigned short) 0; } /* for each chunk of 8 residues in our query */ for(e = 1; e<=nsq; e++) { for(i=0 ; i<8 ; i++) { l = f + i; if(l<n0) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -