📄 dropgsw2.c
字号:
data = ppst->pam2[ip][aa0[l]][e] - bias; } else { data = 0; } *ps++ = (unsigned short)data; } } } pc = f_str->byte_score; for(f = 0; f<n0 ; f+=16) { /* e=0 */ for(i=0 ; i<16 ; i++) { *pc++ = (unsigned char)0; } for(e = 1; e<=nsq; e++) { for(i=0 ; i<16 ; i++) { l = f + i; if (l<n0) { data = ppst->pam2[ip][aa0[l]][e] - bias; } else { data = 0; } if(data>255) { /* printf("Fatal error. Score out of range for 8-bit Altivec/VMX datatype.\n"); exit(1); */ overflow = 1; } *pc++ = (unsigned char)data; } } } } f_str->bias = (unsigned char) (-bias); f_str->alphabet_size = nsq+1; /* Some variable to keep track of how many 8-bit runs we need to rerun * in 16-bit accuracy. If there are too many reruns it can be faster * to use 16-bit alignments directly. */ /* We can only do 8-bit alignments if the scores were small enough. */ if(overflow==0) f_str->try_8bit = 1; else f_str->try_8bit = 0; f_str->done_8bit = 0; f_str->done_16bit = 0; #endif /* SW_ALTIVEC */#if defined(SW_SSE2) /* First we allocate memory for the workspace - i.e. two rows for H and * one row for F. We also need enough space to hold a temporary * scoring profile which will be query_length * 16 (sse2 word length). * Since this might be run on Linux or AIX too, we don't assume * anything about the memory allocation but align it ourselves. */ f_str->workspace_memory = (void *)malloc(3*16*(MAXTST+MAXLIB+32)+256); f_str->workspace = (void *) ((((size_t) f_str->workspace_memory) + 255) & (~0xff)); /* We always use a scoring profile for the SSE2 implementation, but the layout * is a bit strange. The scoring profile is parallel to the query, but is * accessed in a stripped pattern. The query is divided into equal length * segments. The number of segments is equal to the number of elements * processed in the SSE2 register. For 8-bit calculations, the query will * be divided into 16 equal length parts. If the query is not long enough * to fill the last segment, it will be filled with neutral weights. The * first element in the SSE register will hold a value from the first segment, * the second element of the SSE register will hold a value from the * second segment and so on. So if the query length is 288, then each * segment will have a length of 18. So the first 16 bytes will have * the following weights: Q1, Q19, Q37, ... Q271; the next 16 bytes will * have the following weights: Q2, Q20, Q38, ... Q272; and so on until * all parts of all segments have been written. The last seqment will * have the following weights: Q18, Q36, Q54, ... Q288. This will be * done for the entire alphabet. */ f_str->word_score_memory = (void *)malloc((n0 + 32) * sizeof (short) * (nsq + 1) + 256); f_str->byte_score_memory = (void *)malloc((n0 + 32) * sizeof (char) * (nsq + 1) + 256); f_str->word_score = (unsigned short *) ((((size_t) f_str->word_score_memory) + 255) & (~0xff)); f_str->byte_score = (unsigned char *) ((((size_t) f_str->byte_score_memory) + 255) & (~0xff)); overflow = 0; if (ppst->pam_pssm) { /* Use a position-specific scoring profile. * This is essentially what we are going to construct anyway, but we'll * reorder it to suit sse2. */ bias = 127; for (i = 1; i <= nsq ; i++) { for (j = 0; j < n0 ; j++) { data = ppst->pam2p[ip][j][i]; if (data < bias) { bias = data; } } } /* Fill our specially organized byte- and word-size scoring arrays. */ ps = f_str->word_score; col_len = (n0 + 7) / 8; n_count = (n0 + 7) & 0xfffffff8; for (f = 0; f < n_count; ++f) { *ps++ = 0; } for (f = 1; f <= nsq ; f++) { for (e = 0; e < col_len; e++) { for (i = e; i < n_count; i += col_len) { if ( i < n0) { data = ppst->pam2p[ip][i][f];} else {data = 0;} *ps++ = (unsigned short)data; } } } pc = f_str->byte_score; col_len = (n0 + 15) / 16; n_count = (n0 + 15) & 0xfffffff0; for (f = 0; f < n_count; ++f) { *pc++ = 0; } for (f = 1; f <= nsq ; f++) { for (e = 0; e < col_len; e++) { for (i = e; i < n_count; i += col_len) { if ( i < n0 ) { data = ppst->pam2p[ip][i][f] - bias;} else {data = 0 - bias;} if (data > 255) { printf("Fatal error. data: %d bias: %d, position: %d/%d, " "Score out of range for 8-bit SSE2 datatype.\n", data, bias, f, e); exit(1); } *pc++ = (unsigned char)data; } } } } else { /* Classical simple substitution matrix */ /* Find the bias to use in the substitution matrix */ bias = 127; for (i = 1; i <= nsq ; i++) { for (j = 1; j <= nsq ; j++) { data = ppst->pam2[ip][i][j]; if (data < bias) { bias = data; } } } /* Fill our specially organized byte- and word-size scoring arrays. */ ps = f_str->word_score; col_len = (n0 + 7) / 8; n_count = (n0 + 7) & 0xfffffff8; for (f = 0; f < n_count; ++f) { *ps++ = 0; } for (f = 1; f <= nsq ; f++) { for (e = 0; e < col_len; e++) { for (i = e; i < n_count; i += col_len) { if (i >= n0) { data = 0; } else { data = ppst->pam2[ip][aa0[i]][f]; } *ps++ = (unsigned short)data; } } } pc = f_str->byte_score; col_len = (n0 + 15) / 16; n_count = (n0 + 15) & 0xfffffff0; for (f = 0; f < n_count; ++f) { *pc++ = 0; } for (f = 1; f <= nsq ; f++) { for (e = 0; e < col_len; e++) { for (i = e; i < n_count; i += col_len) { if (i >= n0) { data = -bias; } else { data = ppst->pam2[ip][aa0[i]][f] - bias; } if (data > 255) { printf("Fatal error. data: %d bias: %d, position: %d/%d, " "Score out of range for 8-bit SSE2 datatype.\n", data, bias, f, e); exit(1); } *pc++ = (unsigned char)data; } } } } f_str->bias = (unsigned char) (-bias); f_str->alphabet_size = nsq+1; /* Some variable to keep track of how many 8-bit runs we need to rerun * in 16-bit accuracy. If there are too many reruns it can be faster * to use 16-bit alignments directly. */ /* We can only do 8-bit alignments if the scores were small enough. */ f_str->try_8bit = (overflow == 0) ? 1 : 0; f_str->done_8bit = 0; f_str->done_16bit = 0;#endif /* SW_SSE2 */ /* minimum allocation for alignment */ f_str->max_res = max(3*n0/2,MIN_RES); *f_arg = f_str;}void close_work (const unsigned char *aa0, int n0, struct pstruct *ppst, struct f_struct **f_arg){ struct f_struct *f_str; f_str = *f_arg; if (f_str != NULL) { if (f_str->kar_p !=NULL) free(f_str->kar_p); f_str->ss--; free(f_str->ss); free(f_str->waa_a); free(f_str->pam2p[0][0]); free(f_str->pam2p[0]); free(f_str->waa_s); free(f_str->pam2p[1][0]); free(f_str->pam2p[1]);#if defined(SW_ALTIVEC) || defined(SW_SSE2) free(f_str->workspace_memory); free(f_str->word_score_memory); free(f_str->byte_score_memory);#endif free(f_str); *f_arg = NULL; }}/* pstring1 is a message to the manager, currently 512 *//*void get_param(struct pstruct *ppst,char *pstring1)*/void get_param (struct pstruct *ppst, char **pstring1, char *pstring2){ char pg_str[120]; char psi_str[120];#if defined(SW_ALTIVEC) strncpy(pg_str,"Smith-Waterman (Altivec/VMX, Erik Lindahl 2004)",sizeof(pg_str));#endif#if defined(SW_SSE2) strncpy(pg_str,"Smith-Waterman (SSE2, Michael Farrar 2006)",sizeof(pg_str));#endif#if !defined(SW_ALTIVEC) && !defined(SW_SSE2) strncpy(pg_str,"Smith-Waterman (PGopt)",sizeof(pg_str));#endif if (ppst->pam_pssm) { strncpy(psi_str,"-PSI",sizeof(psi_str));} else { psi_str[0]='\0';} sprintf (pstring1[0], "%s (%s)", pg_str, verstr); sprintf (pstring1[1], #ifdef OLD_FASTA_GAP "%s matrix%s (%d:%d)%s, gap-penalty: %d/%d",#else "%s matrix%s (%d:%d)%s, open/ext: %d/%d",#endif ppst->pamfile, psi_str, ppst->pam_h,ppst->pam_l, (ppst->ext_sq_set)?"xS":"\0", ppst->gdelval, ppst->ggapval); if (pstring2 != NULL) {#ifdef OLD_FASTA_GAP sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s%s (%d:%d)%s\n; pg_gap-pen: %d %d\n",#else sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s%s (%d:%d)%s\n; pg_open-ext: %d %d\n",#endif pg_str,verstr,ppst->pamfile,psi_str,ppst->pam_h,ppst->pam_l, (ppst->ext_sq_set)?"xS":"\0",ppst->gdelval,ppst->ggapval); }}void do_work (const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int frame, struct pstruct *ppst, struct f_struct *f_str, int qr_flg, struct rstruct *rst){ int score; double lambda, H; int i; #ifdef LALIGN if (same_seq(aa0,n0,aa1,n1)) { rst->score[0] = prof_score(aa1, n0, f_str->waa_s); return; }#endif#ifdef SW_ALTIVEC if(f_str->try_8bit) { score = smith_waterman_altivec_byte(aa0, f_str->byte_score, n0, aa1, n1, f_str->bias,#ifndef OLD_FASTA_GAP -(ppst->gdelval + ppst->ggapval),#else -ppst->gdelval,#endif -ppst->ggapval, f_str); f_str->done_8bit++; if(score>=255) { /* Overflow, so we have to redo it in 16 bits. */ score = smith_waterman_altivec_word(aa0, f_str->word_score, n0, aa1, n1, f_str->bias,#ifndef OLD_FASTA_GAP -(ppst->gdelval + ppst->ggapval),#else -ppst->gdelval,#endif -ppst->ggapval, f_str); /* The 8 bit version is roughly 50% faster than the 16 bit version, * so we are fine if less than about 1/3 of the runs have to * be rerun with 16 bits. If it is more, and we have tried at least * 500 sequences, we switch off the 8-bit mode. */ f_str->done_16bit++; if(f_str->done_8bit>500 && (3*f_str->done_16bit)>(f_str->done_8bit)) f_str->try_8bit = 0; } } else { /* Just use the 16-bit altivec version directly */ score = smith_waterman_altivec_word(aa0,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -