📄 sse2mmx.c
字号:
#include "sse2mmx.h"#include "spc.h"/* Initializes xmm6 and xmm7 with TA loaded in xmm2 */void init67TA() { asm("movdqa %xmm2, %xmm7"); asm("movdqa %xmm2, %xmm6");}/* loads TA in xmm2 */float a[8] = { TA, TA, TA, TA, TA, TA, TA, TA };void load2TA() { asm("movdqa %0, %%xmm2"::"m"(a[0]));}/* adds a to xmm7 and stores result in xmm7 */void add (short int *a) { asm("movdqa %0, %%xmm0"::"m"(a[0])); asm("paddw %xmm0, %xmm7");}/* loads the received values a in xmm7 */void load7(short int *a) { asm("movdqu %0, %%xmm7"::"m"(a[0]));}/* loads 8 short integers from a as 4 floats each into xmm4 and xmm5 */void load45data(short int *a) { asm("movdqa %0, %%xmm3"::"m"(a[0])); asm("movdqa %xmm3,%xmm4"); asm("movdqa %xmm4,%xmm5"); asm("psraw $15,%xmm3"); asm("punpcklwd %xmm3,%xmm4"); asm("punpckhwd %xmm3,%xmm5"); asm("cvtdq2ps %xmm4,%xmm4"); asm("cvtdq2ps %xmm5,%xmm5");}/* loads the 8 received short integers as 4 floats each in xmm6 and xmm7 */void load67data(short int *a) { asm("movdqu %0, %%xmm3"::"m"(a[0])); asm("movdqu %xmm3,%xmm6"); asm("movdqu %xmm4,%xmm7"); asm("psraw $15,%xmm3"); asm("punpcklwd %xmm3,%xmm6"); asm("punpckhwd %xmm3,%xmm7"); asm("cvtdq2ps %xmm6,%xmm6"); asm("cvtdq2ps %xmm7,%xmm7");}/* does prod * msg /TA where * prod is in xmm6 and xmm7 * TA is in xmm2 * msg is in xmm4 and xmm5 */void mulfloat() { asm("mulps %xmm4,%xmm6"); asm("mulps %xmm5,%xmm7"); asm("divps %xmm2,%xmm7"); asm("divps %xmm2,%xmm6");}/* saves xmm4 in a and xmm5 in b */void save45temp(float * a,float * b) { asm("movdqa %%xmm4,%0"::"m"(a[0])); asm("movdqa %%xmm5,%0"::"m"(b[0]));}/* saves xmm6 in a and xmm7 in b */void save67temp(float * a,float * b) { asm("movdqu %%xmm6,%0"::"m"(a[0])); asm("movdqu %%xmm7,%0"::"m"(b[0]));}/* puts prod * TA / message in xmm4 and xmm5 */void divide(float * a,float *b) { asm("movdqa %xmm6,%xmm4"); asm("movdqa %xmm7,%xmm5"); asm("mulps %xmm2,%xmm4"); asm("mulps %xmm2,%xmm5"); asm("movdqa %0,%%xmm1"::"m"(a[0])); asm("movdqa %0,%%xmm3"::"m"(b[0])); asm("divps %xmm1,%xmm4"); asm("divps %xmm3,%xmm5");}/* converts 4 floats each from xmm4 and xmm5 * to short integers and stores in a */void loadmessage45(short int * a) { asm("cvtps2dq %xmm4,%xmm4"); asm("cvtps2dq %xmm5,%xmm5"); asm("packssdw %xmm5,%xmm4");asm("movdqa %%xmm4,%0":"=m"(a[0]));}/* converts 4 floats each from xmm6 and xmm7 * to short integers and stores in a */void loadmessage67(short int * a) { asm("cvtps2dq %xmm6,%xmm6"); asm("cvtps2dq %xmm7,%xmm7"); asm("packssdw %xmm7,%xmm6");asm("movdqa %%xmm6,%0":"=m"(a[0]));}/* multiplies xmm7 with a and stores result in xmm7 */void mul(short int *a) { asm("pmullw %0, %%xmm7"::"m"(a[0]));}/* subtracts message a from xmm7 and stores result in xmm0 */void sub(short int *a) { asm("movdqu %0, %%xmm1"::"m"(a[0])); asm("movdqu %xmm7,%xmm0"); asm("psubw %xmm1,%xmm0");}/*loads xmm0 in a */void retrieve0(short int *a) {asm("movdqu %%xmm0, %0":"=m"(a[0]));}/* loads xmm7 in a */void retrieve7(short int *a) {asm("movdqu %%xmm7, %0":"=m"(a[0]));}/* loads x in xmm6 and -x in xmm5 */short int init_a[8];short int init_b[8];void init(short int x) { init_a[0]=-x; init_a[1]=-x; init_a[2]=-x; init_a[3]=-x; init_a[4]=-x; init_a[5]=-x; init_a[6]=-x; init_a[7]=-x; init_b[0]=x; init_b[1]=x; init_b[2]=x; init_b[3]=x; init_b[4]=x; init_b[5]=x; init_b[6]=x; init_b[7]=x; asm("movdqu %0, %%xmm5"::"m"(init_a[0])); asm("movdqu %0, %%xmm6"::"m"(init_b[0]));}/* limits xmm0 to lie between values in xmm5 and xmm6 */void limit() { asm("pminsw %xmm6 , %xmm0"); asm("pmaxsw %xmm5 , %xmm0");}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -