neon_helper.c
来自「xen虚拟机源代码安装包」· C语言 代码 · 共 1,458 行 · 第 1/3 页
C
1,458 行
/* * ARM NEON vector operations. * * Copyright (c) 2007, 2008 CodeSourcery. * Written by Paul Brook * * This code is licenced under the GNU GPL v2. */#include <stdlib.h>#include <stdio.h>#include "cpu.h"#include "exec-all.h"#include "helpers.h"#define SIGNBIT (uint32_t)0x80000000#define SIGNBIT64 ((uint64_t)1 << 63)#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Qstatic float_status neon_float_status;#define NFS &neon_float_status/* Helper routines to perform bitwise copies between float and int. */static inline float32 vfp_itos(uint32_t i){ union { uint32_t i; float32 s; } v; v.i = i; return v.s;}static inline uint32_t vfp_stoi(float32 s){ union { uint32_t i; float32 s; } v; v.s = s; return v.i;}#define NEON_TYPE1(name, type) \typedef struct \{ \ type v1; \} neon_##name;#ifdef WORDS_BIGENDIAN#define NEON_TYPE2(name, type) \typedef struct \{ \ type v2; \ type v1; \} neon_##name;#define NEON_TYPE4(name, type) \typedef struct \{ \ type v4; \ type v3; \ type v2; \ type v1; \} neon_##name;#else#define NEON_TYPE2(name, type) \typedef struct \{ \ type v1; \ type v2; \} neon_##name;#define NEON_TYPE4(name, type) \typedef struct \{ \ type v1; \ type v2; \ type v3; \ type v4; \} neon_##name;#endifNEON_TYPE4(s8, int8_t)NEON_TYPE4(u8, uint8_t)NEON_TYPE2(s16, int16_t)NEON_TYPE2(u16, uint16_t)NEON_TYPE1(s32, int32_t)NEON_TYPE1(u32, uint32_t)#undef NEON_TYPE4#undef NEON_TYPE2#undef NEON_TYPE1/* Copy from a uint32_t to a vector structure type. */#define NEON_UNPACK(vtype, dest, val) do { \ union { \ vtype v; \ uint32_t i; \ } conv_u; \ conv_u.i = (val); \ dest = conv_u.v; \ } while(0)/* Copy from a vector structure type to a uint32_t. */#define NEON_PACK(vtype, dest, val) do { \ union { \ vtype v; \ uint32_t i; \ } conv_u; \ conv_u.v = (val); \ dest = conv_u.i; \ } while(0)#define NEON_DO1 \ NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1);#define NEON_DO2 \ NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2);#define NEON_DO4 \ NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \ NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \ NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4);#define NEON_VOP_BODY(vtype, n) \{ \ uint32_t res; \ vtype vsrc1; \ vtype vsrc2; \ vtype vdest; \ NEON_UNPACK(vtype, vsrc1, arg1); \ NEON_UNPACK(vtype, vsrc2, arg2); \ NEON_DO##n; \ NEON_PACK(vtype, res, vdest); \ return res; \}#define NEON_VOP(name, vtype, n) \uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \NEON_VOP_BODY(vtype, n)#define NEON_VOP_ENV(name, vtype, n) \uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \NEON_VOP_BODY(vtype, n)/* Pairwise operations. *//* For 32-bit elements each segment only contains a single element, so the elementwise and pairwise operations are the same. */#define NEON_PDO2 \ NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2);#define NEON_PDO4 \ NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \ NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \ NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \#define NEON_POP(name, vtype, n) \uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \{ \ uint32_t res; \ vtype vsrc1; \ vtype vsrc2; \ vtype vdest; \ NEON_UNPACK(vtype, vsrc1, arg1); \ NEON_UNPACK(vtype, vsrc2, arg2); \ NEON_PDO##n; \ NEON_PACK(vtype, res, vdest); \ return res; \}/* Unary operators. */#define NEON_VOP1(name, vtype, n) \uint32_t HELPER(glue(neon_,name))(uint32_t arg) \{ \ vtype vsrc1; \ vtype vdest; \ NEON_UNPACK(vtype, vsrc1, arg); \ NEON_DO##n; \ NEON_PACK(vtype, arg, vdest); \ return arg; \}#define NEON_USAT(dest, src1, src2, type) do { \ uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ if (tmp != (type)tmp) { \ SET_QC(); \ dest = ~0; \ } else { \ dest = tmp; \ }} while(0)#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)NEON_VOP_ENV(qadd_u8, neon_u8, 4)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)NEON_VOP_ENV(qadd_u16, neon_u16, 2)#undef NEON_FN#undef NEON_USAT#define NEON_SSAT(dest, src1, src2, type) do { \ int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ if (tmp != (type)tmp) { \ SET_QC(); \ if (src2 > 0) { \ tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ } else { \ tmp = 1 << (sizeof(type) * 8 - 1); \ } \ } \ dest = tmp; \ } while(0)#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)NEON_VOP_ENV(qadd_s8, neon_s8, 4)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)NEON_VOP_ENV(qadd_s16, neon_s16, 2)#undef NEON_FN#undef NEON_SSAT#define NEON_USAT(dest, src1, src2, type) do { \ uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ if (tmp != (type)tmp) { \ SET_QC(); \ dest = 0; \ } else { \ dest = tmp; \ }} while(0)#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)NEON_VOP_ENV(qsub_u8, neon_u8, 4)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)NEON_VOP_ENV(qsub_u16, neon_u16, 2)#undef NEON_FN#undef NEON_USAT#define NEON_SSAT(dest, src1, src2, type) do { \ int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ if (tmp != (type)tmp) { \ SET_QC(); \ if (src2 < 0) { \ tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ } else { \ tmp = 1 << (sizeof(type) * 8 - 1); \ } \ } \ dest = tmp; \ } while(0)#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)NEON_VOP_ENV(qsub_s8, neon_s8, 4)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)NEON_VOP_ENV(qsub_s16, neon_s16, 2)#undef NEON_FN#undef NEON_SSAT#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1NEON_VOP(hadd_s8, neon_s8, 4)NEON_VOP(hadd_u8, neon_u8, 4)NEON_VOP(hadd_s16, neon_s16, 2)NEON_VOP(hadd_u16, neon_u16, 2)#undef NEON_FNint32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2){ int32_t dest; dest = (src1 >> 1) + (src2 >> 1); if (src1 & src2 & 1) dest++; return dest;}uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2){ uint32_t dest; dest = (src1 >> 1) + (src2 >> 1); if (src1 & src2 & 1) dest++; return dest;}#define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1NEON_VOP(rhadd_s8, neon_s8, 4)NEON_VOP(rhadd_u8, neon_u8, 4)NEON_VOP(rhadd_s16, neon_s16, 2)NEON_VOP(rhadd_u16, neon_u16, 2)#undef NEON_FNint32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2){ int32_t dest; dest = (src1 >> 1) + (src2 >> 1); if ((src1 | src2) & 1) dest++; return dest;}uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2){ uint32_t dest; dest = (src1 >> 1) + (src2 >> 1); if ((src1 | src2) & 1) dest++; return dest;}#define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1NEON_VOP(hsub_s8, neon_s8, 4)NEON_VOP(hsub_u8, neon_u8, 4)NEON_VOP(hsub_s16, neon_s16, 2)NEON_VOP(hsub_u16, neon_u16, 2)#undef NEON_FNint32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2){ int32_t dest; dest = (src1 >> 1) - (src2 >> 1); if ((~src1) & src2 & 1) dest--; return dest;}uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2){ uint32_t dest; dest = (src1 >> 1) - (src2 >> 1); if ((~src1) & src2 & 1) dest--; return dest;}#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0NEON_VOP(cgt_s8, neon_s8, 4)NEON_VOP(cgt_u8, neon_u8, 4)NEON_VOP(cgt_s16, neon_s16, 2)NEON_VOP(cgt_u16, neon_u16, 2)NEON_VOP(cgt_s32, neon_s32, 1)NEON_VOP(cgt_u32, neon_u32, 1)#undef NEON_FN#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0NEON_VOP(cge_s8, neon_s8, 4)NEON_VOP(cge_u8, neon_u8, 4)NEON_VOP(cge_s16, neon_s16, 2)NEON_VOP(cge_u16, neon_u16, 2)NEON_VOP(cge_s32, neon_s32, 1)NEON_VOP(cge_u32, neon_u32, 1)#undef NEON_FN#define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2NEON_VOP(min_s8, neon_s8, 4)NEON_VOP(min_u8, neon_u8, 4)NEON_VOP(min_s16, neon_s16, 2)NEON_VOP(min_u16, neon_u16, 2)NEON_VOP(min_s32, neon_s32, 1)NEON_VOP(min_u32, neon_u32, 1)NEON_POP(pmin_s8, neon_s8, 4)NEON_POP(pmin_u8, neon_u8, 4)NEON_POP(pmin_s16, neon_s16, 2)NEON_POP(pmin_u16, neon_u16, 2)#undef NEON_FN#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2NEON_VOP(max_s8, neon_s8, 4)NEON_VOP(max_u8, neon_u8, 4)NEON_VOP(max_s16, neon_s16, 2)NEON_VOP(max_u16, neon_u16, 2)NEON_VOP(max_s32, neon_s32, 1)NEON_VOP(max_u32, neon_u32, 1)NEON_POP(pmax_s8, neon_s8, 4)NEON_POP(pmax_u8, neon_u8, 4)NEON_POP(pmax_s16, neon_s16, 2)NEON_POP(pmax_u16, neon_u16, 2)#undef NEON_FN#define NEON_FN(dest, src1, src2) \ dest = (src1 > src2) ? (src1 - src2) : (src2 - src1)NEON_VOP(abd_s8, neon_s8, 4)NEON_VOP(abd_u8, neon_u8, 4)NEON_VOP(abd_s16, neon_s16, 2)NEON_VOP(abd_u16, neon_u16, 2)NEON_VOP(abd_s32, neon_s32, 1)NEON_VOP(abd_u32, neon_u32, 1)#undef NEON_FN#define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ if (tmp >= sizeof(src1) * 8 || tmp <= -sizeof(src1) * 8) { \ dest = 0; \ } else if (tmp < 0) { \ dest = src1 >> -tmp; \ } else { \ dest = src1 << tmp; \ }} while (0)NEON_VOP(shl_u8, neon_u8, 4)NEON_VOP(shl_u16, neon_u16, 2)NEON_VOP(shl_u32, neon_u32, 1)#undef NEON_FNuint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop){ int8_t shift = (int8_t)shiftop; if (shift >= 64 || shift <= -64) { val = 0; } else if (shift < 0) { val >>= -shift; } else { val <<= shift; } return val;}#define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ if (tmp >= sizeof(src1) * 8) { \ dest = 0; \ } else if (tmp <= -sizeof(src1) * 8) { \ dest = src1 >> (sizeof(src1) * 8 - 1); \ } else if (tmp < 0) { \ dest = src1 >> -tmp; \ } else { \ dest = src1 << tmp; \ }} while (0)NEON_VOP(shl_s8, neon_s8, 4)NEON_VOP(shl_s16, neon_s16, 2)NEON_VOP(shl_s32, neon_s32, 1)#undef NEON_FNuint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop){ int8_t shift = (int8_t)shiftop; int64_t val = valop; if (shift >= 64) { val = 0; } else if (shift <= -64) { val >>= 63; } else if (shift < 0) { val >>= -shift; } else { val <<= shift; } return val;}#define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ if (tmp >= sizeof(src1) * 8) { \ dest = 0; \ } else if (tmp < -sizeof(src1) * 8) { \ dest >>= sizeof(src1) * 8 - 1; \ } else if (tmp == -sizeof(src1) * 8) { \ dest = src1 >> (tmp - 1); \ dest++; \ src2 >>= 1; \ } else if (tmp < 0) { \ dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ } else { \ dest = src1 << tmp; \ }} while (0)NEON_VOP(rshl_s8, neon_s8, 4)NEON_VOP(rshl_s16, neon_s16, 2)NEON_VOP(rshl_s32, neon_s32, 1)#undef NEON_FNuint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop){ int8_t shift = (int8_t)shiftop; int64_t val = valop; if (shift >= 64) { val = 0; } else if (shift < -64) { val >>= 63; } else if (shift == -63) { val >>= 63; val++; val >>= 1; } else if (shift < 0) {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?