📄 ops_sse.h
字号:
for(i = 0; i < (2 << SHIFT); i++) { d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) + (int16_t)s->W(2*i+1) * (int16_t)d->W(2*i+1); } FORCE_RET();}#if SHIFT == 0static inline int abs1(int a){ if (a < 0) return -a; else return a;}#endifvoid glue(helper_psadbw, SUFFIX) (Reg *d, Reg *s){ unsigned int val; val = 0; val += abs1(d->B(0) - s->B(0)); val += abs1(d->B(1) - s->B(1)); val += abs1(d->B(2) - s->B(2)); val += abs1(d->B(3) - s->B(3)); val += abs1(d->B(4) - s->B(4)); val += abs1(d->B(5) - s->B(5)); val += abs1(d->B(6) - s->B(6)); val += abs1(d->B(7) - s->B(7)); d->Q(0) = val;#if SHIFT == 1 val = 0; val += abs1(d->B(8) - s->B(8)); val += abs1(d->B(9) - s->B(9)); val += abs1(d->B(10) - s->B(10)); val += abs1(d->B(11) - s->B(11)); val += abs1(d->B(12) - s->B(12)); val += abs1(d->B(13) - s->B(13)); val += abs1(d->B(14) - s->B(14)); val += abs1(d->B(15) - s->B(15)); d->Q(1) = val;#endif}void glue(helper_maskmov, SUFFIX) (Reg *d, Reg *s){ int i; for(i = 0; i < (8 << SHIFT); i++) { if (s->B(i) & 0x80) stb(A0 + i, d->B(i)); } FORCE_RET();}void glue(helper_movl_mm_T0, SUFFIX) (Reg *d, uint32_t val){ d->L(0) = val; d->L(1) = 0;#if SHIFT == 1 d->Q(1) = 0;#endif}#ifdef TARGET_X86_64void glue(helper_movq_mm_T0, SUFFIX) (Reg *d, uint64_t val){ d->Q(0) = val;#if SHIFT == 1 d->Q(1) = 0;#endif}#endif#if SHIFT == 0void glue(helper_pshufw, SUFFIX) (Reg *d, Reg *s, int order){ Reg r; r.W(0) = s->W(order & 3); r.W(1) = s->W((order >> 2) & 3); r.W(2) = s->W((order >> 4) & 3); r.W(3) = s->W((order >> 6) & 3); *d = r;}#elsevoid helper_shufps(Reg *d, Reg *s, int order){ Reg r; r.L(0) = d->L(order & 3); r.L(1) = d->L((order >> 2) & 3); r.L(2) = s->L((order >> 4) & 3); r.L(3) = s->L((order >> 6) & 3); *d = r;}void helper_shufpd(Reg *d, Reg *s, int order){ Reg r; r.Q(0) = d->Q(order & 1); r.Q(1) = s->Q((order >> 1) & 1); *d = r;}void glue(helper_pshufd, SUFFIX) (Reg *d, Reg *s, int order){ Reg r; r.L(0) = s->L(order & 3); r.L(1) = s->L((order >> 2) & 3); r.L(2) = s->L((order >> 4) & 3); r.L(3) = s->L((order >> 6) & 3); *d = r;}void glue(helper_pshuflw, SUFFIX) (Reg *d, Reg *s, int order){ Reg r; r.W(0) = s->W(order & 3); r.W(1) = s->W((order >> 2) & 3); r.W(2) = s->W((order >> 4) & 3); r.W(3) = s->W((order >> 6) & 3); r.Q(1) = s->Q(1); *d = r;}void glue(helper_pshufhw, SUFFIX) (Reg *d, Reg *s, int order){ Reg r; r.Q(0) = s->Q(0); r.W(4) = s->W(4 + (order & 3)); r.W(5) = s->W(4 + ((order >> 2) & 3)); r.W(6) = s->W(4 + ((order >> 4) & 3)); r.W(7) = s->W(4 + ((order >> 6) & 3)); *d = r;}#endif#if SHIFT == 1/* FPU ops *//* XXX: not accurate */#define SSE_HELPER_S(name, F)\void helper_ ## name ## ps (Reg *d, Reg *s)\{\ d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\ d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));\ d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));\}\\void helper_ ## name ## ss (Reg *d, Reg *s)\{\ d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\}\void helper_ ## name ## pd (Reg *d, Reg *s)\{\ d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\}\\void helper_ ## name ## sd (Reg *d, Reg *s)\{\ d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\}#define FPU_ADD(size, a, b) float ## size ## _add(a, b, &env->sse_status)#define FPU_SUB(size, a, b) float ## size ## _sub(a, b, &env->sse_status)#define FPU_MUL(size, a, b) float ## size ## _mul(a, b, &env->sse_status)#define FPU_DIV(size, a, b) float ## size ## _div(a, b, &env->sse_status)#define FPU_MIN(size, a, b) (a) < (b) ? (a) : (b)#define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b)#define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status)SSE_HELPER_S(add, FPU_ADD)SSE_HELPER_S(sub, FPU_SUB)SSE_HELPER_S(mul, FPU_MUL)SSE_HELPER_S(div, FPU_DIV)SSE_HELPER_S(min, FPU_MIN)SSE_HELPER_S(max, FPU_MAX)SSE_HELPER_S(sqrt, FPU_SQRT)/* float to float conversions */void helper_cvtps2pd(Reg *d, Reg *s){ float32 s0, s1; s0 = s->XMM_S(0); s1 = s->XMM_S(1); d->XMM_D(0) = float32_to_float64(s0, &env->sse_status); d->XMM_D(1) = float32_to_float64(s1, &env->sse_status);}void helper_cvtpd2ps(Reg *d, Reg *s){ d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status); d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status); d->Q(1) = 0;}void helper_cvtss2sd(Reg *d, Reg *s){ d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status);}void helper_cvtsd2ss(Reg *d, Reg *s){ d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);}/* integer to float */void helper_cvtdq2ps(Reg *d, Reg *s){ d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status); d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status); d->XMM_S(2) = int32_to_float32(s->XMM_L(2), &env->sse_status); d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status);}void helper_cvtdq2pd(Reg *d, Reg *s){ int32_t l0, l1; l0 = (int32_t)s->XMM_L(0); l1 = (int32_t)s->XMM_L(1); d->XMM_D(0) = int32_to_float64(l0, &env->sse_status); d->XMM_D(1) = int32_to_float64(l1, &env->sse_status);}void helper_cvtpi2ps(XMMReg *d, MMXReg *s){ d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status); d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status);}void helper_cvtpi2pd(XMMReg *d, MMXReg *s){ d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status); d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status);}void helper_cvtsi2ss(XMMReg *d, uint32_t val){ d->XMM_S(0) = int32_to_float32(val, &env->sse_status);}void helper_cvtsi2sd(XMMReg *d, uint32_t val){ d->XMM_D(0) = int32_to_float64(val, &env->sse_status);}#ifdef TARGET_X86_64void helper_cvtsq2ss(XMMReg *d, uint64_t val){ d->XMM_S(0) = int64_to_float32(val, &env->sse_status);}void helper_cvtsq2sd(XMMReg *d, uint64_t val){ d->XMM_D(0) = int64_to_float64(val, &env->sse_status);}#endif/* float to integer */void helper_cvtps2dq(XMMReg *d, XMMReg *s){ d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status); d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status); d->XMM_L(2) = float32_to_int32(s->XMM_S(2), &env->sse_status); d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status);}void helper_cvtpd2dq(XMMReg *d, XMMReg *s){ d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status); d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status); d->XMM_Q(1) = 0;}void helper_cvtps2pi(MMXReg *d, XMMReg *s){ d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status); d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);}void helper_cvtpd2pi(MMXReg *d, XMMReg *s){ d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status); d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);}int32_t helper_cvtss2si(XMMReg *s){ return float32_to_int32(s->XMM_S(0), &env->sse_status);}int32_t helper_cvtsd2si(XMMReg *s){ return float64_to_int32(s->XMM_D(0), &env->sse_status);}#ifdef TARGET_X86_64int64_t helper_cvtss2sq(XMMReg *s){ return float32_to_int64(s->XMM_S(0), &env->sse_status);}int64_t helper_cvtsd2sq(XMMReg *s){ return float64_to_int64(s->XMM_D(0), &env->sse_status);}#endif/* float to integer truncated */void helper_cvttps2dq(XMMReg *d, XMMReg *s){ d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status); d->XMM_L(2) = float32_to_int32_round_to_zero(s->XMM_S(2), &env->sse_status); d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status);}void helper_cvttpd2dq(XMMReg *d, XMMReg *s){ d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status); d->XMM_Q(1) = 0;}void helper_cvttps2pi(MMXReg *d, XMMReg *s){ d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);}void helper_cvttpd2pi(MMXReg *d, XMMReg *s){ d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);}int32_t helper_cvttss2si(XMMReg *s){ return float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);}int32_t helper_cvttsd2si(XMMReg *s){ return float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);}#ifdef TARGET_X86_64int64_t helper_cvttss2sq(XMMReg *s){ return float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status);}int64_t helper_cvttsd2sq(XMMReg *s){ return float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status);}#endifvoid helper_rsqrtps(XMMReg *d, XMMReg *s){ d->XMM_S(0) = approx_rsqrt(s->XMM_S(0)); d->XMM_S(1) = approx_rsqrt(s->XMM_S(1)); d->XMM_S(2) = approx_rsqrt(s->XMM_S(2)); d->XMM_S(3) = approx_rsqrt(s->XMM_S(3));}void helper_rsqrtss(XMMReg *d, XMMReg *s){ d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));}void helper_rcpps(XMMReg *d, XMMReg *s){ d->XMM_S(0) = approx_rcp(s->XMM_S(0)); d->XMM_S(1) = approx_rcp(s->XMM_S(1)); d->XMM_S(2) = approx_rcp(s->XMM_S(2)); d->XMM_S(3) = approx_rcp(s->XMM_S(3));}void helper_rcpss(XMMReg *d, XMMReg *s){ d->XMM_S(0) = approx_rcp(s->XMM_S(0));}void helper_haddps(XMMReg *d, XMMReg *s){ XMMReg r; r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1); r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3); r.XMM_S(2) = s->XMM_S(0) + s->XMM_S(1); r.XMM_S(3) = s->XMM_S(2) + s->XMM_S(3); *d = r;}void helper_haddpd(XMMReg *d, XMMReg *s){ XMMReg r; r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1); r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1); *d = r;}void helper_hsubps(XMMReg *d, XMMReg *s){ XMMReg r; r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1); r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3); r.XMM_S(2) = s->XMM_S(0) - s->XMM_S(1); r.XMM_S(3) = s->XMM_S(2) - s->XMM_S(3); *d = r;}void helper_hsubpd(XMMReg *d, XMMReg *s){ XMMReg r; r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1); r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1); *d = r;}void helper_addsubps(XMMReg *d, XMMReg *s){ d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0); d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1); d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2); d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -