📄 smith_waterman_altivec.c
字号:
vector unsigned char v_score_q3; vector unsigned char v_score_q4; vector unsigned char v_score_q5; vector unsigned char v_score_load1; vector unsigned char v_score_load2; vector unsigned char v_zero; vector unsigned char queue1_to_score = (vector unsigned char)(16,1,2,3,4,5,6,7,24,9,10,11,12,13,14,15); vector unsigned char queue2_to_queue1 = (vector unsigned char)(16,17,2,3,4,5,6,7,24,25,10,11,12,13,14,15); vector unsigned char queue3_to_queue2 = (vector unsigned char)(16,17,18,3,4,5,6,7,24,25,26,11,12,13,14,15); vector unsigned char queue4_to_queue3 = (vector unsigned char)(16,17,18,19,4,5,6,7,24,25,26,27,12,13,14,15); vector unsigned char queue5_to_queue4 = (vector unsigned char)(16,17,18,19,20,2,3,4,24,25,26,27,28,10,11,12); vector unsigned char queue5_with_load = (vector unsigned char)(19,20,21,5,6,22,7,23,27,28,29,13,14,30,15,31); vector unsigned char merge_score_load = (vector unsigned char)(0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31); v_zero = vec_splat_u8(0); /* Load the bias to all elements of a constant */ v_bias = vec_lde(0,&bias); perm = vec_lvsl(0,&bias); v_bias = vec_perm(v_bias,v_bias,perm); v_bias = vec_splat(v_bias,0); /* Load gap opening penalty to all elements of a constant */ v_gapopen = vec_lde(0,&gap_open); perm = vec_lvsl(0,&gap_open); v_gapopen = vec_perm(v_gapopen,v_gapopen,perm); v_gapopen = vec_splat(v_gapopen,0); /* Load gap extension penalty to all elements of a constant */ v_gapextend = vec_lde(0,&gap_extend); perm = vec_lvsl(0,&gap_extend); v_gapextend = vec_perm(v_gapextend,v_gapextend,perm); v_gapextend = vec_splat(v_gapextend,0); v_maxscore = vec_xor(v_maxscore,v_maxscore); // Zero out the storage vector k = (db_length+15); for(i=0,j=0;i<k;i++,j+=32) { // borrow the zero value in v_maxscore to have something to store vec_st(v_maxscore,j,workspace); vec_st(v_maxscore,j+16,workspace); } for(i=0;i<query_length;i+=16) { // zero lots of stuff. // We use both the VPERM and VSIU unit to knock off some cycles. E = vec_splat_u8(0); F = vec_xor(F,F); H = vec_splat_u8(0); Hup2 = vec_xor(Hup2,Hup2); v_score_q1 = vec_splat_u8(0); v_score_q2 = vec_xor(v_score_q2,v_score_q2); v_score_q3 = vec_splat_u8(0); v_score_q4 = vec_xor(v_score_q4,v_score_q4); v_score_q5 = vec_splat_u8(0); // reset pointers to the start of the saved data from the last row p = workspace; // start directly and prefetch score column k = db_sequence[0]; k8 = k; v_score_load1 = vec_ld(16*k,query_profile_byte); v_score_load2 = v_score_load1; v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load); // PROLOGUE 1 // prefetch next residue k = db_sequence[1]; v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score); v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1); v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2); v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3); v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4); v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load); // prefetch score for next step v_score_load1 = vec_ld(16*k,query_profile_byte); // load values of F and H from previous row (one unit up) Fup = vec_ld(0, p); Hup1 = vec_ld(16, p); p += 32; // move ahead 32 bytes // shift into place so we have complete F and H vectors // that refer to the values one unit up from each cell // that we are currently working on. Fup = vec_sld(Fup,F,15); Hup1 = vec_sld(Hup1,H,15); // do the dynamic programming // update E value E = vec_subs(E,v_gapextend); tmp = vec_subs(H,v_gapopen); E = vec_max(E,tmp); // update F value F = vec_subs(Fup,v_gapextend); tmp = vec_subs(Hup1,v_gapopen); F = vec_max(F,tmp); v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load); // add score to H H = vec_adds(Hup2,v_score); H = vec_subs(H,v_bias); // set H to max of H,E,F H = vec_max(H,E); H = vec_max(H,F); // Update highest score encountered this far v_maxscore = vec_max(v_maxscore,H); // PROLOGUE 2 // prefetch next residue k = db_sequence[2]; v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score); v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1); v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2); v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3); v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4); v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load); // prefetch score for next step v_score_load1 = vec_ld(16*k,query_profile_byte); // load values of F and H from previous row (one unit up) Fup = vec_ld(0, p); Hup2 = vec_ld(16, p); p += 32; // move ahead 32 bytes // shift into place so we have complete F and H vectors // that refer to the values one unit up from each cell // that we are currently working on. Fup = vec_sld(Fup,F,15); Hup2 = vec_sld(Hup2,H,15); // do the dynamic programming // update E value E = vec_subs(E,v_gapextend); tmp = vec_subs(H,v_gapopen); E = vec_max(E,tmp); // update F value F = vec_subs(Fup,v_gapextend); tmp = vec_subs(Hup2,v_gapopen); F = vec_max(F,tmp); v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load); // add score to H H = vec_adds(Hup1,v_score); H = vec_subs(H,v_bias); // set H to max of H,E,F H = vec_max(H,E); H = vec_max(H,F); // Update highest score encountered this far v_maxscore = vec_max(v_maxscore,H); // PROLOGUE 3 // prefetch next residue k = db_sequence[3]; v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score); v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1); v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2); v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3); v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4); v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load); // prefetch score for next step v_score_load1 = vec_ld(16*k,query_profile_byte); // load values of F and H from previous row (one unit up) Fup = vec_ld(0, p); Hup1 = vec_ld(16, p); p += 32; // move ahead 32 bytes // shift into place so we have complete F and H vectors // that refer to the values one unit up from each cell // that we are currently working on. Fup = vec_sld(Fup,F,15); Hup1 = vec_sld(Hup1,H,15); // do the dynamic programming // update E value E = vec_subs(E,v_gapextend); tmp = vec_subs(H,v_gapopen); E = vec_max(E,tmp); // update F value F = vec_subs(Fup,v_gapextend); tmp = vec_subs(Hup1,v_gapopen); F = vec_max(F,tmp); v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load); // add score to H H = vec_adds(Hup2,v_score); H = vec_subs(H,v_bias); // set H to max of H,E,F H = vec_max(H,E); H = vec_max(H,F); // Update highest score encountered this far v_maxscore = vec_max(v_maxscore,H); // PROLOGUE 4 // prefetch next residue k = db_sequence[4]; v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score); v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1); v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2); v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3); v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4); v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load); // prefetch score for next step v_score_load1 = vec_ld(16*k,query_profile_byte); // load values of F and H from previous row (one unit up) Fup = vec_ld(0, p); Hup2 = vec_ld(16, p); p += 32; // move ahead 32 bytes // shift into place so we have complete F and H vectors // that refer to the values one unit up from each cell // that we are currently working on. Fup = vec_sld(Fup,F,15); Hup2 = vec_sld(Hup2,H,15); // do the dynamic programming // update E value E = vec_subs(E,v_gapextend); tmp = vec_subs(H,v_gapopen); E = vec_max(E,tmp); // update F value F = vec_subs(Fup,v_gapextend); tmp = vec_subs(Hup2,v_gapopen); F = vec_max(F,tmp); v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load); // add score to H H = vec_adds(Hup1,v_score); H = vec_subs(H,v_bias); // set H to max of H,E,F H = vec_max(H,E); H = vec_max(H,F); // Update highest score encountered this far v_maxscore = vec_max(v_maxscore,H); // PROLOGUE 5 // prefetch next residue k = db_sequence[5]; v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score); v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1); v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2); v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3); v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4); v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load); // prefetch score for next step v_score_load1 = vec_ld(16*k,query_profile_byte); // load values of F and H from previous row (one unit up) Fup = vec_ld(0, p); Hup1 = vec_ld(16, p); p += 32; // move ahead 32 bytes // shift into place so we have complete F and H vectors // that refer to the values one unit up from each cell // that we are currently working on. Fup = vec_sld(Fup,F,15); Hup1 = vec_sld(Hup1,H,15); // do the dynamic programming // update E value E = vec_subs(E,v_gapextend); tmp = vec_subs(H,v_gapopen); E = vec_max(E,tmp); // update F value F = vec_subs(Fup,v_gapextend); tmp = vec_subs(Hup1,v_gapopen); F = vec_max(F,tmp); v_score_load1 = vec_perm(v_score_load1,v_zero,merge_score_load); // add score to H H = vec_adds(Hup2,v_score); H = vec_subs(H,v_bias); // set H to max of H,E,F H = vec_max(H,E); H = vec_max(H,F); // Update highest score encountered this far v_maxscore = vec_max(v_maxscore,H); // PROLOGUE 6 // prefetch next residue k = db_sequence[6]; v_score = vec_perm(v_score_q1, v_score_load1, queue1_to_score); v_score_q1 = vec_perm(v_score_q2, v_score_load1, queue2_to_queue1); v_score_q2 = vec_perm(v_score_q3, v_score_load1, queue3_to_queue2); v_score_q3 = vec_perm(v_score_q4, v_score_load1, queue4_to_queue3); v_score_q4 = vec_perm(v_score_q5, v_score_load1, queue5_to_queue4); v_score_q5 = vec_perm(v_score_q5, v_score_load1, queue5_with_load); // prefetch score for next step v_score_load1 = vec_ld(16*k,query_profile_byte); // load values of F and H from previous row (one unit up) Fup = vec_ld(0, p); Hup2 = vec_ld(16, p); p += 32; // move ahead 32 bytes // shift into place so we have complete F and H vectors // that refer to the values one unit up from each cell // that we are currently working on. Fup = vec_sld(Fup,F,15); Hup2 = vec_sld(Hup2,H,15); // do the dynamic programming // update E value E = vec_subs(E,v_gapextend);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -