neon_helper.c
来自「xen虚拟机源代码安装包」· C语言 代码 · 共 1,458 行 · 第 1/3 页
C
1,458 行
val = (val + ((int64_t)1 << (-1 - shift))) >> -shift; } else { val <<= shift; } return val;}#define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ if (tmp >= sizeof(src1) * 8 || tmp < -sizeof(src1) * 8) { \ dest = 0; \ } else if (tmp == -sizeof(src1) * 8) { \ dest = src1 >> (tmp - 1); \ } else if (tmp < 0) { \ dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ } else { \ dest = src1 << tmp; \ }} while (0)NEON_VOP(rshl_u8, neon_u8, 4)NEON_VOP(rshl_u16, neon_u16, 2)NEON_VOP(rshl_u32, neon_u32, 1)#undef NEON_FNuint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop){ int8_t shift = (uint8_t)shiftop; if (shift >= 64 || shift < 64) { val = 0; } else if (shift == -64) { /* Rounding a 1-bit result just preserves that bit. */ val >>= 63; } if (shift < 0) { val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift; val >>= -shift; } else { val <<= shift; } return val;}#define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ if (tmp >= sizeof(src1) * 8) { \ if (src1) { \ SET_QC(); \ dest = ~0; \ } else { \ dest = 0; \ } \ } else if (tmp <= -sizeof(src1) * 8) { \ dest = 0; \ } else if (tmp < 0) { \ dest = src1 >> -tmp; \ } else { \ dest = src1 << tmp; \ if ((dest >> tmp) != src1) { \ SET_QC(); \ dest = ~0; \ } \ }} while (0)NEON_VOP_ENV(qshl_u8, neon_u8, 4)NEON_VOP_ENV(qshl_u16, neon_u16, 2)NEON_VOP_ENV(qshl_u32, neon_u32, 1)#undef NEON_FNuint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop){ int8_t shift = (int8_t)shiftop; if (shift >= 64) { if (val) { val = ~(uint64_t)0; SET_QC(); } else { val = 0; } } else if (shift <= -64) { val = 0; } else if (shift < 0) { val >>= -shift; } else { uint64_t tmp = val; val <<= shift; if ((val >> shift) != tmp) { SET_QC(); val = ~(uint64_t)0; } } return val;}#define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ if (tmp >= sizeof(src1) * 8) { \ if (src1) \ SET_QC(); \ dest = src1 >> 31; \ } else if (tmp <= -sizeof(src1) * 8) { \ dest = src1 >> 31; \ } else if (tmp < 0) { \ dest = src1 >> -tmp; \ } else { \ dest = src1 << tmp; \ if ((dest >> tmp) != src1) { \ SET_QC(); \ dest = src2 >> 31; \ } \ }} while (0)NEON_VOP_ENV(qshl_s8, neon_s8, 4)NEON_VOP_ENV(qshl_s16, neon_s16, 2)NEON_VOP_ENV(qshl_s32, neon_s32, 1)#undef NEON_FNuint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop){ int8_t shift = (uint8_t)shiftop; int64_t val = valop; if (shift >= 64) { if (val) { SET_QC(); val = (val >> 63) & ~SIGNBIT64; } } else if (shift <= 64) { val >>= 63; } else if (shift < 0) { val >>= -shift; } else { int64_t tmp = val; val <<= shift; if ((val >> shift) != tmp) { SET_QC(); val = (tmp >> 63) ^ ~SIGNBIT64; } } return val;}/* FIXME: This is wrong. */#define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ if (tmp < 0) { \ dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ } else { \ dest = src1 << tmp; \ if ((dest >> tmp) != src1) { \ SET_QC(); \ dest = ~0; \ } \ }} while (0)NEON_VOP_ENV(qrshl_u8, neon_u8, 4)NEON_VOP_ENV(qrshl_u16, neon_u16, 2)NEON_VOP_ENV(qrshl_u32, neon_u32, 1)#undef NEON_FNuint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop){ int8_t shift = (int8_t)shiftop; if (shift < 0) { val = (val + (1 << (-1 - shift))) >> -shift; } else { \ uint64_t tmp = val; val <<= shift; if ((val >> shift) != tmp) { SET_QC(); val = ~0; } } return val;}#define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ if (tmp < 0) { \ dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ } else { \ dest = src1 << tmp; \ if ((dest >> tmp) != src1) { \ SET_QC(); \ dest = src1 >> 31; \ } \ }} while (0)NEON_VOP_ENV(qrshl_s8, neon_s8, 4)NEON_VOP_ENV(qrshl_s16, neon_s16, 2)NEON_VOP_ENV(qrshl_s32, neon_s32, 1)#undef NEON_FNuint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop){ int8_t shift = (uint8_t)shiftop; int64_t val = valop; if (shift < 0) { val = (val + (1 << (-1 - shift))) >> -shift; } else { int64_t tmp = val;; val <<= shift; if ((val >> shift) != tmp) { SET_QC(); val = tmp >> 31; } } return val;}uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b){ uint32_t mask; mask = (a ^ b) & 0x80808080u; a &= ~0x80808080u; b &= ~0x80808080u; return (a + b) ^ mask;}uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b){ uint32_t mask; mask = (a ^ b) & 0x80008000u; a &= ~0x80008000u; b &= ~0x80008000u; return (a + b) ^ mask;}#define NEON_FN(dest, src1, src2) dest = src1 + src2NEON_POP(padd_u8, neon_u8, 4)NEON_POP(padd_u16, neon_u16, 2)#undef NEON_FN#define NEON_FN(dest, src1, src2) dest = src1 - src2NEON_VOP(sub_u8, neon_u8, 4)NEON_VOP(sub_u16, neon_u16, 2)#undef NEON_FN#define NEON_FN(dest, src1, src2) dest = src1 * src2NEON_VOP(mul_u8, neon_u8, 4)NEON_VOP(mul_u16, neon_u16, 2)#undef NEON_FN/* Polynomial multiplication is like integer multiplication except the partial products are XORed, not added. */uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2){ uint32_t mask; uint32_t result; result = 0; while (op1) { mask = 0; if (op1 & 1) mask |= 0xff; if (op1 & (1 << 8)) mask |= (0xff << 8); if (op1 & (1 << 16)) mask |= (0xff << 16); if (op1 & (1 << 24)) mask |= (0xff << 24); result ^= op2 & mask; op1 = (op1 >> 1) & 0x7f7f7f7f; op2 = (op2 << 1) & 0xfefefefe; } return result;}#define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0NEON_VOP(tst_u8, neon_u8, 4)NEON_VOP(tst_u16, neon_u16, 2)NEON_VOP(tst_u32, neon_u32, 1)#undef NEON_FN#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0NEON_VOP(ceq_u8, neon_u8, 4)NEON_VOP(ceq_u16, neon_u16, 2)NEON_VOP(ceq_u32, neon_u32, 1)#undef NEON_FN#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : srcNEON_VOP1(abs_s8, neon_s8, 4)NEON_VOP1(abs_s16, neon_s16, 2)#undef NEON_FN/* Count Leading Sign/Zero Bits. */static inline int do_clz8(uint8_t x){ int n; for (n = 8; x; n--) x >>= 1; return n;}static inline int do_clz16(uint16_t x){ int n; for (n = 16; x; n--) x >>= 1; return n;}#define NEON_FN(dest, src, dummy) dest = do_clz8(src)NEON_VOP1(clz_u8, neon_u8, 4)#undef NEON_FN#define NEON_FN(dest, src, dummy) dest = do_clz16(src)NEON_VOP1(clz_u16, neon_u16, 2)#undef NEON_FN#define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1NEON_VOP1(cls_s8, neon_s8, 4)#undef NEON_FN#define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1NEON_VOP1(cls_s16, neon_s16, 2)#undef NEON_FNuint32_t HELPER(neon_cls_s32)(uint32_t x){ int count; if ((int32_t)x < 0) x = ~x; for (count = 32; x; count--) x = x >> 1; return count - 1;}/* Bit count. */uint32_t HELPER(neon_cnt_u8)(uint32_t x){ x = (x & 0x55555555) + ((x >> 1) & 0x55555555); x = (x & 0x33333333) + ((x >> 2) & 0x33333333); x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f); return x;}#define NEON_QDMULH16(dest, src1, src2, round) do { \ uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \ if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ SET_QC(); \ tmp = (tmp >> 31) ^ ~SIGNBIT; \ } \ tmp <<= 1; \ if (round) { \ int32_t old = tmp; \ tmp += 1 << 15; \ if ((int32_t)tmp < old) { \ SET_QC(); \ tmp = SIGNBIT - 1; \ } \ } \ dest = tmp >> 16; \ } while(0)#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0)NEON_VOP_ENV(qdmulh_s16, neon_s16, 2)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1)NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2)#undef NEON_FN#undef NEON_QDMULH16#define NEON_QDMULH32(dest, src1, src2, round) do { \ uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \ if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \ SET_QC(); \ tmp = (tmp >> 63) ^ ~SIGNBIT64; \ } else { \ tmp <<= 1; \ } \ if (round) { \ int64_t old = tmp; \ tmp += (int64_t)1 << 31; \ if ((int64_t)tmp < old) { \ SET_QC(); \ tmp = SIGNBIT64 - 1; \ } \ } \ dest = tmp >> 32; \ } while(0)#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0)NEON_VOP_ENV(qdmulh_s32, neon_s32, 1)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1)NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1)#undef NEON_FN#undef NEON_QDMULH32uint32_t HELPER(neon_narrow_u8)(uint64_t x){ return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u) | ((x >> 24) & 0xff000000u);}uint32_t HELPER(neon_narrow_u16)(uint64_t x){ return (x & 0xffffu) | ((x >> 16) & 0xffff0000u);}uint32_t HELPER(neon_narrow_high_u8)(uint64_t x){ return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000);}uint32_t HELPER(neon_narrow_high_u16)(uint64_t x){ return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);}uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x){ x &= 0xff80ff80ff80ff80ull; x += 0x0080008000800080ull; return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000);}uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x){ x &= 0xffff8000ffff8000ull; x += 0x0000800000008000ull; return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);}uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x){ uint16_t s; uint8_t d; uint32_t res = 0;#define SAT8(n) \ s = x >> n; \ if (s > 0xff) { \ d = 0xff; \ SET_QC(); \ } else { \ d = s; \ } \ res |= (uint32_t)d << (n / 2); SAT8(0); SAT8(16); SAT8(32); SAT8(48);#undef SAT8 return res;}uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x){ int16_t s; uint8_t d; uint32_t res = 0;#define SAT8(n) \ s = x >> n; \ if (s != (int8_t)s) { \ d = (s >> 15) ^ 0x7f; \ SET_QC(); \ } else { \ d = s; \ } \ res |= (uint32_t)d << (n / 2); SAT8(0); SAT8(16); SAT8(32); SAT8(48);#undef SAT8 return res;}uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x){ uint32_t high; uint32_t low; low = x; if (low > 0xffff) { low = 0xffff; SET_QC(); } high = x >> 32; if (high > 0xffff) { high = 0xffff; SET_QC(); } return low | (high << 16);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?