📄 ops_sse.h
字号:
Reg *d, *s; d = (Reg *)((char *)env + PARAM1); s = (Reg *)((char *)env + PARAM2); d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0);#if SHIFT == 1 d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2);#endif}void OPPROTO glue(op_pmaddwd, SUFFIX) (void){ int i; Reg *d, *s; d = (Reg *)((char *)env + PARAM1); s = (Reg *)((char *)env + PARAM2); for(i = 0; i < (2 << SHIFT); i++) { d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) + (int16_t)s->W(2*i+1) * (int16_t)d->W(2*i+1); } FORCE_RET();}#if SHIFT == 0static inline int abs1(int a){ if (a < 0) return -a; else return a;}#endifvoid OPPROTO glue(op_psadbw, SUFFIX) (void){ unsigned int val; Reg *d, *s; d = (Reg *)((char *)env + PARAM1); s = (Reg *)((char *)env + PARAM2); val = 0; val += abs1(d->B(0) - s->B(0)); val += abs1(d->B(1) - s->B(1)); val += abs1(d->B(2) - s->B(2)); val += abs1(d->B(3) - s->B(3)); val += abs1(d->B(4) - s->B(4)); val += abs1(d->B(5) - s->B(5)); val += abs1(d->B(6) - s->B(6)); val += abs1(d->B(7) - s->B(7)); d->Q(0) = val;#if SHIFT == 1 val = 0; val += abs1(d->B(8) - s->B(8)); val += abs1(d->B(9) - s->B(9)); val += abs1(d->B(10) - s->B(10)); val += abs1(d->B(11) - s->B(11)); val += abs1(d->B(12) - s->B(12)); val += abs1(d->B(13) - s->B(13)); val += abs1(d->B(14) - s->B(14)); val += abs1(d->B(15) - s->B(15)); d->Q(1) = val;#endif}void OPPROTO glue(op_maskmov, SUFFIX) (void){ int i; Reg *d, *s; d = (Reg *)((char *)env + PARAM1); s = (Reg *)((char *)env + PARAM2); for(i = 0; i < (8 << SHIFT); i++) { if (s->B(i) & 0x80) stb(A0 + i, d->B(i)); } FORCE_RET();}void OPPROTO glue(op_movl_mm_T0, SUFFIX) (void){ Reg *d; d = (Reg *)((char *)env + PARAM1); d->L(0) = T0; d->L(1) = 0;#if SHIFT == 1 d->Q(1) = 0;#endif}void OPPROTO glue(op_movl_T0_mm, SUFFIX) (void){ Reg *s; s = (Reg *)((char *)env + PARAM1); T0 = s->L(0);}#ifdef TARGET_X86_64void OPPROTO glue(op_movq_mm_T0, SUFFIX) (void){ Reg *d; d = (Reg *)((char *)env + PARAM1); d->Q(0) = T0;#if SHIFT == 1 d->Q(1) = 0;#endif}void OPPROTO glue(op_movq_T0_mm, SUFFIX) (void){ Reg *s; s = (Reg *)((char *)env + PARAM1); T0 = s->Q(0);}#endif#if SHIFT == 0void OPPROTO glue(op_pshufw, SUFFIX) (void){ Reg r, *d, *s; int order; d = (Reg *)((char *)env + PARAM1); s = (Reg *)((char *)env + PARAM2); order = PARAM3; r.W(0) = s->W(order & 3); r.W(1) = s->W((order >> 2) & 3); r.W(2) = s->W((order >> 4) & 3); r.W(3) = s->W((order >> 6) & 3); *d = r;}#elsevoid OPPROTO op_shufps(void){ Reg r, *d, *s; int order; d = (Reg *)((char *)env + PARAM1); s = (Reg *)((char *)env + PARAM2); order = PARAM3; r.L(0) = d->L(order & 3); r.L(1) = d->L((order >> 2) & 3); r.L(2) = s->L((order >> 4) & 3); r.L(3) = s->L((order >> 6) & 3); *d = r;}void OPPROTO op_shufpd(void){ Reg r, *d, *s; int order; d = (Reg *)((char *)env + PARAM1); s = (Reg *)((char *)env + PARAM2); order = PARAM3; r.Q(0) = d->Q(order & 1); r.Q(1) = s->Q((order >> 1) & 1); *d = r;}void OPPROTO glue(op_pshufd, SUFFIX) (void){ Reg r, *d, *s; int order; d = (Reg *)((char *)env + PARAM1); s = (Reg *)((char *)env + PARAM2); order = PARAM3; r.L(0) = s->L(order & 3); r.L(1) = s->L((order >> 2) & 3); r.L(2) = s->L((order >> 4) & 3); r.L(3) = s->L((order >> 6) & 3); *d = r;}void OPPROTO glue(op_pshuflw, SUFFIX) (void){ Reg r, *d, *s; int order; d = (Reg *)((char *)env + PARAM1); s = (Reg *)((char *)env + PARAM2); order = PARAM3; r.W(0) = s->W(order & 3); r.W(1) = s->W((order >> 2) & 3); r.W(2) = s->W((order >> 4) & 3); r.W(3) = s->W((order >> 6) & 3); r.Q(1) = s->Q(1); *d = r;}void OPPROTO glue(op_pshufhw, SUFFIX) (void){ Reg r, *d, *s; int order; d = (Reg *)((char *)env + PARAM1); s = (Reg *)((char *)env + PARAM2); order = PARAM3; r.Q(0) = s->Q(0); r.W(4) = s->W(4 + (order & 3)); r.W(5) = s->W(4 + ((order >> 2) & 3)); r.W(6) = s->W(4 + ((order >> 4) & 3)); r.W(7) = s->W(4 + ((order >> 6) & 3)); *d = r;}#endif#if SHIFT == 1/* FPU ops *//* XXX: not accurate */#define SSE_OP_S(name, F)\void OPPROTO op_ ## name ## ps (void)\{\ Reg *d, *s;\ d = (Reg *)((char *)env + PARAM1);\ s = (Reg *)((char *)env + PARAM2);\ d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\ d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));\ d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));\}\\void OPPROTO op_ ## name ## ss (void)\{\ Reg *d, *s;\ d = (Reg *)((char *)env + PARAM1);\ s = (Reg *)((char *)env + PARAM2);\ d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\}\void OPPROTO op_ ## name ## pd (void)\{\ Reg *d, *s;\ d = (Reg *)((char *)env + PARAM1);\ s = (Reg *)((char *)env + PARAM2);\ d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\}\\void OPPROTO op_ ## name ## sd (void)\{\ Reg *d, *s;\ d = (Reg *)((char *)env + PARAM1);\ s = (Reg *)((char *)env + PARAM2);\ d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\}#define FPU_ADD(size, a, b) float ## size ## _add(a, b, &env->sse_status)#define FPU_SUB(size, a, b) float ## size ## _sub(a, b, &env->sse_status)#define FPU_MUL(size, a, b) float ## size ## _mul(a, b, &env->sse_status)#define FPU_DIV(size, a, b) float ## size ## _div(a, b, &env->sse_status)#define FPU_MIN(size, a, b) (a) < (b) ? (a) : (b)#define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b)#define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status)SSE_OP_S(add, FPU_ADD)SSE_OP_S(sub, FPU_SUB)SSE_OP_S(mul, FPU_MUL)SSE_OP_S(div, FPU_DIV)SSE_OP_S(min, FPU_MIN)SSE_OP_S(max, FPU_MAX)SSE_OP_S(sqrt, FPU_SQRT)/* float to float conversions */void OPPROTO op_cvtps2pd(void){ float32 s0, s1; Reg *d, *s; d = (Reg *)((char *)env + PARAM1); s = (Reg *)((char *)env + PARAM2); s0 = s->XMM_S(0); s1 = s->XMM_S(1); d->XMM_D(0) = float32_to_float64(s0, &env->sse_status); d->XMM_D(1) = float32_to_float64(s1, &env->sse_status);}void OPPROTO op_cvtpd2ps(void){ Reg *d, *s; d = (Reg *)((char *)env + PARAM1); s = (Reg *)((char *)env + PARAM2); d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status); d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status); d->Q(1) = 0;}void OPPROTO op_cvtss2sd(void){ Reg *d, *s; d = (Reg *)((char *)env + PARAM1); s = (Reg *)((char *)env + PARAM2); d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status);}void OPPROTO op_cvtsd2ss(void){ Reg *d, *s; d = (Reg *)((char *)env + PARAM1); s = (Reg *)((char *)env + PARAM2); d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);}/* integer to float */void OPPROTO op_cvtdq2ps(void){ XMMReg *d = (XMMReg *)((char *)env + PARAM1); XMMReg *s = (XMMReg *)((char *)env + PARAM2); d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status); d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status); d->XMM_S(2) = int32_to_float32(s->XMM_L(2), &env->sse_status); d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status);}void OPPROTO op_cvtdq2pd(void){ XMMReg *d = (XMMReg *)((char *)env + PARAM1); XMMReg *s = (XMMReg *)((char *)env + PARAM2); int32_t l0, l1; l0 = (int32_t)s->XMM_L(0); l1 = (int32_t)s->XMM_L(1); d->XMM_D(0) = int32_to_float64(l0, &env->sse_status); d->XMM_D(1) = int32_to_float64(l1, &env->sse_status);}void OPPROTO op_cvtpi2ps(void){ XMMReg *d = (Reg *)((char *)env + PARAM1); MMXReg *s = (MMXReg *)((char *)env + PARAM2); d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status); d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status);}void OPPROTO op_cvtpi2pd(void){ XMMReg *d = (Reg *)((char *)env + PARAM1); MMXReg *s = (MMXReg *)((char *)env + PARAM2); d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status); d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status);}void OPPROTO op_cvtsi2ss(void){ XMMReg *d = (Reg *)((char *)env + PARAM1); d->XMM_S(0) = int32_to_float32(T0, &env->sse_status);}void OPPROTO op_cvtsi2sd(void){ XMMReg *d = (Reg *)((char *)env + PARAM1); d->XMM_D(0) = int32_to_float64(T0, &env->sse_status);}#ifdef TARGET_X86_64void OPPROTO op_cvtsq2ss(void){ XMMReg *d = (Reg *)((char *)env + PARAM1); d->XMM_S(0) = int64_to_float32(T0, &env->sse_status);}void OPPROTO op_cvtsq2sd(void){ XMMReg *d = (Reg *)((char *)env + PARAM1); d->XMM_D(0) = int64_to_float64(T0, &env->sse_status);}#endif/* float to integer */void OPPROTO op_cvtps2dq(void){ XMMReg *d = (XMMReg *)((char *)env + PARAM1); XMMReg *s = (XMMReg *)((char *)env + PARAM2); d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status); d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status); d->XMM_L(2) = float32_to_int32(s->XMM_S(2), &env->sse_status); d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status);}void OPPROTO op_cvtpd2dq(void){ XMMReg *d = (XMMReg *)((char *)env + PARAM1); XMMReg *s = (XMMReg *)((char *)env + PARAM2); d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status); d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status); d->XMM_Q(1) = 0;}void OPPROTO op_cvtps2pi(void){ MMXReg *d = (MMXReg *)((char *)env + PARAM1); XMMReg *s = (XMMReg *)((char *)env + PARAM2); d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status); d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);}void OPPROTO op_cvtpd2pi(void){ MMXReg *d = (MMXReg *)((char *)env + PARAM1); XMMReg *s = (XMMReg *)((char *)env + PARAM2); d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status); d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);}void OPPROTO op_cvtss2si(void){ XMMReg *s = (XMMReg *)((char *)env + PARAM1); T0 = float32_to_int32(s->XMM_S(0), &env->sse_status);}void OPPROTO op_cvtsd2si(void){ XMMReg *s = (XMMReg *)((char *)env + PARAM1); T0 = float64_to_int32(s->XMM_D(0), &env->sse_status);}#ifdef TARGET_X86_64void OPPROTO op_cvtss2sq(void){ XMMReg *s = (XMMReg *)((char *)env + PARAM1); T0 = float32_to_int64(s->XMM_S(0), &env->sse_status);}void OPPROTO op_cvtsd2sq(void){ XMMReg *s = (XMMReg *)((char *)env + PARAM1); T0 = float64_to_int64(s->XMM_D(0), &env->sse_status);}#endif/* float to integer truncated */void OPPROTO op_cvttps2dq(void){ XMMReg *d = (XMMReg *)((char *)env + PARAM1); XMMReg *s = (XMMReg *)((char *)env + PARAM2); d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status); d->XMM_L(2) = float32_to_int32_round_to_zero(s->XMM_S(2), &env->sse_status); d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status);}void OPPROTO op_cvttpd2dq(void){ XMMReg *d = (XMMReg *)((char *)env + PARAM1); XMMReg *s = (XMMReg *)((char *)env + PARAM2); d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status); d->XMM_Q(1) = 0;}void OPPROTO op_cvttps2pi(void){ MMXReg *d = (MMXReg *)((char *)env + PARAM1); XMMReg *s = (XMMReg *)((char *)env + PARAM2); d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);}void OPPROTO op_cvttpd2pi(void){ MMXReg *d = (MMXReg *)((char *)env + PARAM1); XMMReg *s = (XMMReg *)((char *)env + PARAM2); d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);}void OPPROTO op_cvttss2si(void){ XMMReg *s = (XMMReg *)((char *)env + PARAM1); T0 = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);}void OPPROTO op_cvttsd2si(void)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -