📄 smith_waterman_altivec.c
字号:
/* Implementation of the Wozniak "anti-diagonal" vectorization strategy for Smith-Waterman comparison, Wozniak (1997) Comp. Appl. Biosci. 13:145-150 November, 2004*//* Written by Erik Lindahl, Stockholm Bioinformatics Center, 2004. Please send bug reports and/or suggestions to lindahl@sbc.su.se.*/#include <stdio.h>#include "defs.h"#include "param.h"#include "dropgsw2.h"#ifdef SW_ALTIVECintsmith_waterman_altivec_word(unsigned char * query_sequence, unsigned short * query_profile_word, int query_length, unsigned char * db_sequence, int db_length, unsigned short bias, unsigned short gap_open, unsigned short gap_extend, struct f_struct * f_str){ int i,j,k; unsigned short * p; unsigned short score; unsigned char * p_dbseq; int alphabet_size = f_str->alphabet_size; unsigned short * workspace = (unsigned short *)f_str->workspace; vector unsigned short Fup,Hup1,Hup2,E,F,H,tmp; vector unsigned char perm; vector unsigned short v_maxscore; vector unsigned short v_bias,v_gapopen,v_gapextend; vector unsigned short v_score; vector unsigned short v_score_q1; vector unsigned short v_score_q2; vector unsigned short v_score_q3; vector unsigned short v_score_load; vector unsigned char queue1_to_score = (vector unsigned char)(16,17,2,3,4,5,6,7,8,9,10,11,12,13,14,15); vector unsigned char queue2_to_queue1 = (vector unsigned char)(0,1,18,19,4,5,6,7,8,9,10,11,12,13,14,15); vector unsigned char queue3_to_queue2 = (vector unsigned char)(16,16,16,16,16,21,16,0,16,1,16,2,16,3,16,4); vector unsigned char queue3_with_load = (vector unsigned char)(23,5,6,7,8,25,9,10,11,27,12,13,29,14,31,16); /* Load the bias to all elements of a constant */ v_bias = vec_lde(0,&bias); perm = vec_lvsl(0,&bias); v_bias = vec_perm(v_bias,v_bias,perm); v_bias = vec_splat(v_bias,0); /* Load gap opening penalty to all elements of a constant */ v_gapopen = vec_lde(0,&gap_open); perm = vec_lvsl(0,&gap_open); v_gapopen = vec_perm(v_gapopen,v_gapopen,perm); v_gapopen = vec_splat(v_gapopen,0); /* Load gap extension penalty to all elements of a constant */ v_gapextend = vec_lde(0,&gap_extend); perm = vec_lvsl(0,&gap_extend); v_gapextend = vec_perm(v_gapextend,v_gapextend,perm); v_gapextend = vec_splat(v_gapextend,0); v_maxscore = vec_xor(v_maxscore,v_maxscore); // Zero out the storage vector k = 2*(db_length+7); for(i=0,j=0;i<k;i++,j+=16) { // borrow the zero value in v_maxscore to have something to store vec_st(v_maxscore,j,workspace); } for(i=0;i<query_length;i+=8) { // fetch first data asap. p_dbseq = db_sequence; k = *p_dbseq++; v_score_load = vec_ld(16*k,query_profile_word); // zero lots of stuff. // We use both the VPERM and VSIU unit to knock off some cycles. E = vec_splat_u16(0); F = vec_xor(F,F); H = vec_splat_u16(0); Hup2 = vec_xor(Hup2,Hup2); v_score_q1 = vec_splat_u16(0); v_score_q2 = vec_xor(v_score_q2,v_score_q2); v_score_q3 = vec_splat_u16(0); // reset pointers to the start of the saved data from the last row p = workspace; // PROLOGUE 1 // prefetch next residue k = *p_dbseq++; // Create the actual diagonal score vector // and update the queue of incomplete score vectors v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score); v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1); v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2); v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load); // prefetch score for next step v_score_load = vec_ld(16*k,query_profile_word); // load values of F and H from previous row (one unit up) Fup = vec_ld(0, p); Hup1 = vec_ld(16, p); p += 16; // move ahead 32 bytes // shift into place so we have complete F and H vectors // that refer to the values one unit up from each cell // that we are currently working on. Fup = vec_sld(Fup,F,14); Hup1 = vec_sld(Hup1,H,14); // do the dynamic programming // update E value E = vec_subs(E,v_gapextend); tmp = vec_subs(H,v_gapopen); E = vec_max(E,tmp); // update F value F = vec_subs(Fup,v_gapextend); tmp = vec_subs(Hup1,v_gapopen); F = vec_max(F,tmp); // add score to H H = vec_adds(Hup2,v_score); H = vec_subs(H,v_bias); // set H to max of H,E,F H = vec_max(H,E); H = vec_max(H,F); // Save value to use for next diagonal H Hup2 = Hup1; // Update highest score encountered this far v_maxscore = vec_max(v_maxscore,H); // PROLOGUE 2 // prefetch next residue k = *p_dbseq++; // Create the actual diagonal score vector // and update the queue of incomplete score vectors v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score); v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1); v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2); v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load); // prefetch score for next step v_score_load = vec_ld(16*k,query_profile_word); // load values of F and H from previous row (one unit up) Fup = vec_ld(0, p); Hup1 = vec_ld(16, p); p += 16; // move ahead 32 bytes // shift into place so we have complete F and H vectors // that refer to the values one unit up from each cell // that we are currently working on. Fup = vec_sld(Fup,F,14); Hup1 = vec_sld(Hup1,H,14); // do the dynamic programming // update E value E = vec_subs(E,v_gapextend); tmp = vec_subs(H,v_gapopen); E = vec_max(E,tmp); // update F value F = vec_subs(Fup,v_gapextend); tmp = vec_subs(Hup1,v_gapopen); F = vec_max(F,tmp); // add score to H H = vec_adds(Hup2,v_score); H = vec_subs(H,v_bias); // set H to max of H,E,F H = vec_max(H,E); H = vec_max(H,F); // Save value to use for next diagonal H Hup2 = Hup1; // Update highest score encountered this far v_maxscore = vec_max(v_maxscore,H); // PROLOGUE 3 // prefetch next residue k = *p_dbseq++; // Create the actual diagonal score vector // and update the queue of incomplete score vectors v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score); v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1); v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2); v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load); // prefetch score for next step v_score_load = vec_ld(16*k,query_profile_word); // load values of F and H from previous row (one unit up) Fup = vec_ld(0, p); Hup1 = vec_ld(16, p); p += 16; // move ahead 32 bytes // shift into place so we have complete F and H vectors // that refer to the values one unit up from each cell // that we are currently working on. Fup = vec_sld(Fup,F,14); Hup1 = vec_sld(Hup1,H,14); // do the dynamic programming // update E value E = vec_subs(E,v_gapextend); tmp = vec_subs(H,v_gapopen); E = vec_max(E,tmp); // update F value F = vec_subs(Fup,v_gapextend); tmp = vec_subs(Hup1,v_gapopen); F = vec_max(F,tmp); // add score to H H = vec_adds(Hup2,v_score); H = vec_subs(H,v_bias); // set H to max of H,E,F H = vec_max(H,E); H = vec_max(H,F); // Save value to use for next diagonal H Hup2 = Hup1; // Update highest score encountered this far v_maxscore = vec_max(v_maxscore,H); // PROLOGUE 4 // prefetch next residue k = *p_dbseq++; // Create the actual diagonal score vector // and update the queue of incomplete score vectors v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score); v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1); v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2); v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load); // prefetch score for next step v_score_load = vec_ld(16*k,query_profile_word); // load values of F and H from previous row (one unit up) Fup = vec_ld(0, p); Hup1 = vec_ld(16, p); p += 16; // move ahead 32 bytes // shift into place so we have complete F and H vectors // that refer to the values one unit up from each cell // that we are currently working on. Fup = vec_sld(Fup,F,14); Hup1 = vec_sld(Hup1,H,14); // do the dynamic programming // update E value E = vec_subs(E,v_gapextend); tmp = vec_subs(H,v_gapopen); E = vec_max(E,tmp); // update F value F = vec_subs(Fup,v_gapextend); tmp = vec_subs(Hup1,v_gapopen); F = vec_max(F,tmp); // add score to H H = vec_adds(Hup2,v_score); H = vec_subs(H,v_bias); // set H to max of H,E,F H = vec_max(H,E); H = vec_max(H,F); // Save value to use for next diagonal H Hup2 = Hup1; // Update highest score encountered this far v_maxscore = vec_max(v_maxscore,H); // PROLOGUE 5 // prefetch next residue k = *p_dbseq++; // Create the actual diagonal score vector // and update the queue of incomplete score vectors v_score = vec_perm(v_score_q1, v_score_load, queue1_to_score); v_score_q1 = vec_perm(v_score_q2, v_score_load, queue2_to_queue1); v_score_q2 = vec_perm(v_score_q3, v_score_load, queue3_to_queue2); v_score_q3 = vec_perm(v_score_q3, v_score_load, queue3_with_load); // prefetch score for next step v_score_load = vec_ld(16*k,query_profile_word); // load values of F and H from previous row (one unit up) Fup = vec_ld(0, p); Hup1 = vec_ld(16, p); p += 16; // move ahead 32 bytes // shift into place so we have complete F and H vectors // that refer to the values one unit up from each cell // that we are currently working on. Fup = vec_sld(Fup,F,14); Hup1 = vec_sld(Hup1,H,14); // do the dynamic programming // update E value E = vec_subs(E,v_gapextend); tmp = vec_subs(H,v_gapopen); E = vec_max(E,tmp); // update F value F = vec_subs(Fup,v_gapextend); tmp = vec_subs(Hup1,v_gapopen); F = vec_max(F,tmp); // add score to H H = vec_adds(Hup2,v_score); H = vec_subs(H,v_bias); // set H to max of H,E,F H = vec_max(H,E); H = vec_max(H,F); // Save value to use for next diagonal H
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -