neon_helper.c

来自「xen虚拟机源代码安装包」· C语言 代码 · 共 1,458 行 · 第 1/3 页

C
1,458
字号
/* * ARM NEON vector operations. * * Copyright (c) 2007, 2008 CodeSourcery. * Written by Paul Brook * * This code is licenced under the GNU GPL v2. */#include <stdlib.h>#include <stdio.h>#include "cpu.h"#include "exec-all.h"#include "helpers.h"#define SIGNBIT (uint32_t)0x80000000#define SIGNBIT64 ((uint64_t)1 << 63)#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Qstatic float_status neon_float_status;#define NFS &neon_float_status/* Helper routines to perform bitwise copies between float and int.  */static inline float32 vfp_itos(uint32_t i){    union {        uint32_t i;        float32 s;    } v;    v.i = i;    return v.s;}static inline uint32_t vfp_stoi(float32 s){    union {        uint32_t i;        float32 s;    } v;    v.s = s;    return v.i;}#define NEON_TYPE1(name, type) \typedef struct \{ \    type v1; \} neon_##name;#ifdef WORDS_BIGENDIAN#define NEON_TYPE2(name, type) \typedef struct \{ \    type v2; \    type v1; \} neon_##name;#define NEON_TYPE4(name, type) \typedef struct \{ \    type v4; \    type v3; \    type v2; \    type v1; \} neon_##name;#else#define NEON_TYPE2(name, type) \typedef struct \{ \    type v1; \    type v2; \} neon_##name;#define NEON_TYPE4(name, type) \typedef struct \{ \    type v1; \    type v2; \    type v3; \    type v4; \} neon_##name;#endifNEON_TYPE4(s8, int8_t)NEON_TYPE4(u8, uint8_t)NEON_TYPE2(s16, int16_t)NEON_TYPE2(u16, uint16_t)NEON_TYPE1(s32, int32_t)NEON_TYPE1(u32, uint32_t)#undef NEON_TYPE4#undef NEON_TYPE2#undef NEON_TYPE1/* Copy from a uint32_t to a vector structure type.  */#define NEON_UNPACK(vtype, dest, val) do { \    union { \        vtype v; \        uint32_t i; \    } conv_u; \    conv_u.i = (val); \    dest = conv_u.v; \    } while(0)/* Copy from a vector structure type to a uint32_t.  */#define NEON_PACK(vtype, dest, val) do { \    union { \        vtype v; \        uint32_t i; \    } conv_u; \    conv_u.v = (val); \    dest = conv_u.i; \    } while(0)#define NEON_DO1 \    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1);#define NEON_DO2 \    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \    NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2);#define NEON_DO4 \    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \    NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \    NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \    NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4);#define NEON_VOP_BODY(vtype, n) \{ \    uint32_t res; \    vtype vsrc1; \    vtype vsrc2; \    vtype vdest; \    NEON_UNPACK(vtype, vsrc1, arg1); \    NEON_UNPACK(vtype, vsrc2, arg2); \    NEON_DO##n; \    NEON_PACK(vtype, res, vdest); \    return res; \}#define NEON_VOP(name, vtype, n) \uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \NEON_VOP_BODY(vtype, n)#define NEON_VOP_ENV(name, vtype, n) \uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \NEON_VOP_BODY(vtype, n)/* Pairwise operations.  *//* For 32-bit elements each segment only contains a single element, so   the elementwise and pairwise operations are the same.  */#define NEON_PDO2 \    NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \    NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2);#define NEON_PDO4 \    NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \    NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \    NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \    NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \#define NEON_POP(name, vtype, n) \uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \{ \    uint32_t res; \    vtype vsrc1; \    vtype vsrc2; \    vtype vdest; \    NEON_UNPACK(vtype, vsrc1, arg1); \    NEON_UNPACK(vtype, vsrc2, arg2); \    NEON_PDO##n; \    NEON_PACK(vtype, res, vdest); \    return res; \}/* Unary operators.  */#define NEON_VOP1(name, vtype, n) \uint32_t HELPER(glue(neon_,name))(uint32_t arg) \{ \    vtype vsrc1; \    vtype vdest; \    NEON_UNPACK(vtype, vsrc1, arg); \    NEON_DO##n; \    NEON_PACK(vtype, arg, vdest); \    return arg; \}#define NEON_USAT(dest, src1, src2, type) do { \    uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \    if (tmp != (type)tmp) { \        SET_QC(); \        dest = ~0; \    } else { \        dest = tmp; \    }} while(0)#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)NEON_VOP_ENV(qadd_u8, neon_u8, 4)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)NEON_VOP_ENV(qadd_u16, neon_u16, 2)#undef NEON_FN#undef NEON_USAT#define NEON_SSAT(dest, src1, src2, type) do { \    int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \    if (tmp != (type)tmp) { \        SET_QC(); \        if (src2 > 0) { \            tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \        } else { \            tmp = 1 << (sizeof(type) * 8 - 1); \        } \    } \    dest = tmp; \    } while(0)#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)NEON_VOP_ENV(qadd_s8, neon_s8, 4)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)NEON_VOP_ENV(qadd_s16, neon_s16, 2)#undef NEON_FN#undef NEON_SSAT#define NEON_USAT(dest, src1, src2, type) do { \    uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \    if (tmp != (type)tmp) { \        SET_QC(); \        dest = 0; \    } else { \        dest = tmp; \    }} while(0)#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)NEON_VOP_ENV(qsub_u8, neon_u8, 4)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)NEON_VOP_ENV(qsub_u16, neon_u16, 2)#undef NEON_FN#undef NEON_USAT#define NEON_SSAT(dest, src1, src2, type) do { \    int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \    if (tmp != (type)tmp) { \        SET_QC(); \        if (src2 < 0) { \            tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \        } else { \            tmp = 1 << (sizeof(type) * 8 - 1); \        } \    } \    dest = tmp; \    } while(0)#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)NEON_VOP_ENV(qsub_s8, neon_s8, 4)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)NEON_VOP_ENV(qsub_s16, neon_s16, 2)#undef NEON_FN#undef NEON_SSAT#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1NEON_VOP(hadd_s8, neon_s8, 4)NEON_VOP(hadd_u8, neon_u8, 4)NEON_VOP(hadd_s16, neon_s16, 2)NEON_VOP(hadd_u16, neon_u16, 2)#undef NEON_FNint32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2){    int32_t dest;    dest = (src1 >> 1) + (src2 >> 1);    if (src1 & src2 & 1)        dest++;    return dest;}uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2){    uint32_t dest;    dest = (src1 >> 1) + (src2 >> 1);    if (src1 & src2 & 1)        dest++;    return dest;}#define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1NEON_VOP(rhadd_s8, neon_s8, 4)NEON_VOP(rhadd_u8, neon_u8, 4)NEON_VOP(rhadd_s16, neon_s16, 2)NEON_VOP(rhadd_u16, neon_u16, 2)#undef NEON_FNint32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2){    int32_t dest;    dest = (src1 >> 1) + (src2 >> 1);    if ((src1 | src2) & 1)        dest++;    return dest;}uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2){    uint32_t dest;    dest = (src1 >> 1) + (src2 >> 1);    if ((src1 | src2) & 1)        dest++;    return dest;}#define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1NEON_VOP(hsub_s8, neon_s8, 4)NEON_VOP(hsub_u8, neon_u8, 4)NEON_VOP(hsub_s16, neon_s16, 2)NEON_VOP(hsub_u16, neon_u16, 2)#undef NEON_FNint32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2){    int32_t dest;    dest = (src1 >> 1) - (src2 >> 1);    if ((~src1) & src2 & 1)        dest--;    return dest;}uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2){    uint32_t dest;    dest = (src1 >> 1) - (src2 >> 1);    if ((~src1) & src2 & 1)        dest--;    return dest;}#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0NEON_VOP(cgt_s8, neon_s8, 4)NEON_VOP(cgt_u8, neon_u8, 4)NEON_VOP(cgt_s16, neon_s16, 2)NEON_VOP(cgt_u16, neon_u16, 2)NEON_VOP(cgt_s32, neon_s32, 1)NEON_VOP(cgt_u32, neon_u32, 1)#undef NEON_FN#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0NEON_VOP(cge_s8, neon_s8, 4)NEON_VOP(cge_u8, neon_u8, 4)NEON_VOP(cge_s16, neon_s16, 2)NEON_VOP(cge_u16, neon_u16, 2)NEON_VOP(cge_s32, neon_s32, 1)NEON_VOP(cge_u32, neon_u32, 1)#undef NEON_FN#define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2NEON_VOP(min_s8, neon_s8, 4)NEON_VOP(min_u8, neon_u8, 4)NEON_VOP(min_s16, neon_s16, 2)NEON_VOP(min_u16, neon_u16, 2)NEON_VOP(min_s32, neon_s32, 1)NEON_VOP(min_u32, neon_u32, 1)NEON_POP(pmin_s8, neon_s8, 4)NEON_POP(pmin_u8, neon_u8, 4)NEON_POP(pmin_s16, neon_s16, 2)NEON_POP(pmin_u16, neon_u16, 2)#undef NEON_FN#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2NEON_VOP(max_s8, neon_s8, 4)NEON_VOP(max_u8, neon_u8, 4)NEON_VOP(max_s16, neon_s16, 2)NEON_VOP(max_u16, neon_u16, 2)NEON_VOP(max_s32, neon_s32, 1)NEON_VOP(max_u32, neon_u32, 1)NEON_POP(pmax_s8, neon_s8, 4)NEON_POP(pmax_u8, neon_u8, 4)NEON_POP(pmax_s16, neon_s16, 2)NEON_POP(pmax_u16, neon_u16, 2)#undef NEON_FN#define NEON_FN(dest, src1, src2) \    dest = (src1 > src2) ? (src1 - src2) : (src2 - src1)NEON_VOP(abd_s8, neon_s8, 4)NEON_VOP(abd_u8, neon_u8, 4)NEON_VOP(abd_s16, neon_s16, 2)NEON_VOP(abd_u16, neon_u16, 2)NEON_VOP(abd_s32, neon_s32, 1)NEON_VOP(abd_u32, neon_u32, 1)#undef NEON_FN#define NEON_FN(dest, src1, src2) do { \    int8_t tmp; \    tmp = (int8_t)src2; \    if (tmp >= sizeof(src1) * 8 || tmp <= -sizeof(src1) * 8) { \        dest = 0; \    } else if (tmp < 0) { \        dest = src1 >> -tmp; \    } else { \        dest = src1 << tmp; \    }} while (0)NEON_VOP(shl_u8, neon_u8, 4)NEON_VOP(shl_u16, neon_u16, 2)NEON_VOP(shl_u32, neon_u32, 1)#undef NEON_FNuint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop){    int8_t shift = (int8_t)shiftop;    if (shift >= 64 || shift <= -64) {        val = 0;    } else if (shift < 0) {        val >>= -shift;    } else {        val <<= shift;    }    return val;}#define NEON_FN(dest, src1, src2) do { \    int8_t tmp; \    tmp = (int8_t)src2; \    if (tmp >= sizeof(src1) * 8) { \        dest = 0; \    } else if (tmp <= -sizeof(src1) * 8) { \        dest = src1 >> (sizeof(src1) * 8 - 1); \    } else if (tmp < 0) { \        dest = src1 >> -tmp; \    } else { \        dest = src1 << tmp; \    }} while (0)NEON_VOP(shl_s8, neon_s8, 4)NEON_VOP(shl_s16, neon_s16, 2)NEON_VOP(shl_s32, neon_s32, 1)#undef NEON_FNuint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop){    int8_t shift = (int8_t)shiftop;    int64_t val = valop;    if (shift >= 64) {        val = 0;    } else if (shift <= -64) {        val >>= 63;    } else if (shift < 0) {        val >>= -shift;    } else {        val <<= shift;    }    return val;}#define NEON_FN(dest, src1, src2) do { \    int8_t tmp; \    tmp = (int8_t)src2; \    if (tmp >= sizeof(src1) * 8) { \        dest = 0; \    } else if (tmp < -sizeof(src1) * 8) { \        dest >>= sizeof(src1) * 8 - 1; \    } else if (tmp == -sizeof(src1) * 8) { \        dest = src1 >> (tmp - 1); \        dest++; \        src2 >>= 1; \    } else if (tmp < 0) { \        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \    } else { \        dest = src1 << tmp; \    }} while (0)NEON_VOP(rshl_s8, neon_s8, 4)NEON_VOP(rshl_s16, neon_s16, 2)NEON_VOP(rshl_s32, neon_s32, 1)#undef NEON_FNuint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop){    int8_t shift = (int8_t)shiftop;    int64_t val = valop;    if (shift >= 64) {        val = 0;    } else if (shift < -64) {        val >>= 63;    } else if (shift == -63) {        val >>= 63;        val++;        val >>= 1;    } else if (shift < 0) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?