neon_helper.c

来自「xen虚拟机源代码安装包」· C语言 代码 · 共 1,458 行 · 第 1/3 页

C
1,458
字号
        val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;    } else {        val <<= shift;    }    return val;}#define NEON_FN(dest, src1, src2) do { \    int8_t tmp; \    tmp = (int8_t)src2; \    if (tmp >= sizeof(src1) * 8 || tmp < -sizeof(src1) * 8) { \        dest = 0; \    } else if (tmp == -sizeof(src1) * 8) { \        dest = src1 >> (tmp - 1); \    } else if (tmp < 0) { \        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \    } else { \        dest = src1 << tmp; \    }} while (0)NEON_VOP(rshl_u8, neon_u8, 4)NEON_VOP(rshl_u16, neon_u16, 2)NEON_VOP(rshl_u32, neon_u32, 1)#undef NEON_FNuint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop){    int8_t shift = (uint8_t)shiftop;    if (shift >= 64 || shift < 64) {        val = 0;    } else if (shift == -64) {        /* Rounding a 1-bit result just preserves that bit.  */        val >>= 63;    } if (shift < 0) {        val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;        val >>= -shift;    } else {        val <<= shift;    }    return val;}#define NEON_FN(dest, src1, src2) do { \    int8_t tmp; \    tmp = (int8_t)src2; \    if (tmp >= sizeof(src1) * 8) { \        if (src1) { \            SET_QC(); \            dest = ~0; \        } else { \            dest = 0; \        } \    } else if (tmp <= -sizeof(src1) * 8) { \        dest = 0; \    } else if (tmp < 0) { \        dest = src1 >> -tmp; \    } else { \        dest = src1 << tmp; \        if ((dest >> tmp) != src1) { \            SET_QC(); \            dest = ~0; \        } \    }} while (0)NEON_VOP_ENV(qshl_u8, neon_u8, 4)NEON_VOP_ENV(qshl_u16, neon_u16, 2)NEON_VOP_ENV(qshl_u32, neon_u32, 1)#undef NEON_FNuint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop){    int8_t shift = (int8_t)shiftop;    if (shift >= 64) {        if (val) {            val = ~(uint64_t)0;            SET_QC();        } else {            val = 0;        }    } else if (shift <= -64) {        val = 0;    } else if (shift < 0) {        val >>= -shift;    } else {        uint64_t tmp = val;        val <<= shift;        if ((val >> shift) != tmp) {            SET_QC();            val = ~(uint64_t)0;        }    }    return val;}#define NEON_FN(dest, src1, src2) do { \    int8_t tmp; \    tmp = (int8_t)src2; \    if (tmp >= sizeof(src1) * 8) { \        if (src1) \            SET_QC(); \        dest = src1 >> 31; \    } else if (tmp <= -sizeof(src1) * 8) { \        dest = src1 >> 31; \    } else if (tmp < 0) { \        dest = src1 >> -tmp; \    } else { \        dest = src1 << tmp; \        if ((dest >> tmp) != src1) { \            SET_QC(); \            dest = src2 >> 31; \        } \    }} while (0)NEON_VOP_ENV(qshl_s8, neon_s8, 4)NEON_VOP_ENV(qshl_s16, neon_s16, 2)NEON_VOP_ENV(qshl_s32, neon_s32, 1)#undef NEON_FNuint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop){    int8_t shift = (uint8_t)shiftop;    int64_t val = valop;    if (shift >= 64) {        if (val) {            SET_QC();            val = (val >> 63) & ~SIGNBIT64;        }    } else if (shift <= 64) {        val >>= 63;    } else if (shift < 0) {        val >>= -shift;    } else {        int64_t tmp = val;        val <<= shift;        if ((val >> shift) != tmp) {            SET_QC();            val = (tmp >> 63) ^ ~SIGNBIT64;        }    }    return val;}/* FIXME: This is wrong.  */#define NEON_FN(dest, src1, src2) do { \    int8_t tmp; \    tmp = (int8_t)src2; \    if (tmp < 0) { \        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \    } else { \        dest = src1 << tmp; \        if ((dest >> tmp) != src1) { \            SET_QC(); \            dest = ~0; \        } \    }} while (0)NEON_VOP_ENV(qrshl_u8, neon_u8, 4)NEON_VOP_ENV(qrshl_u16, neon_u16, 2)NEON_VOP_ENV(qrshl_u32, neon_u32, 1)#undef NEON_FNuint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop){    int8_t shift = (int8_t)shiftop;    if (shift < 0) {        val = (val + (1 << (-1 - shift))) >> -shift;    } else { \        uint64_t tmp = val;        val <<= shift;        if ((val >> shift) != tmp) {            SET_QC();            val = ~0;        }    }    return val;}#define NEON_FN(dest, src1, src2) do { \    int8_t tmp; \    tmp = (int8_t)src2; \    if (tmp < 0) { \        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \    } else { \        dest = src1 << tmp; \        if ((dest >> tmp) != src1) { \            SET_QC(); \            dest = src1 >> 31; \        } \    }} while (0)NEON_VOP_ENV(qrshl_s8, neon_s8, 4)NEON_VOP_ENV(qrshl_s16, neon_s16, 2)NEON_VOP_ENV(qrshl_s32, neon_s32, 1)#undef NEON_FNuint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop){    int8_t shift = (uint8_t)shiftop;    int64_t val = valop;    if (shift < 0) {        val = (val + (1 << (-1 - shift))) >> -shift;    } else {        int64_t tmp = val;;        val <<= shift;        if ((val >> shift) != tmp) {            SET_QC();            val = tmp >> 31;        }    }    return val;}uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b){    uint32_t mask;    mask = (a ^ b) & 0x80808080u;    a &= ~0x80808080u;    b &= ~0x80808080u;    return (a + b) ^ mask;}uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b){    uint32_t mask;    mask = (a ^ b) & 0x80008000u;    a &= ~0x80008000u;    b &= ~0x80008000u;    return (a + b) ^ mask;}#define NEON_FN(dest, src1, src2) dest = src1 + src2NEON_POP(padd_u8, neon_u8, 4)NEON_POP(padd_u16, neon_u16, 2)#undef NEON_FN#define NEON_FN(dest, src1, src2) dest = src1 - src2NEON_VOP(sub_u8, neon_u8, 4)NEON_VOP(sub_u16, neon_u16, 2)#undef NEON_FN#define NEON_FN(dest, src1, src2) dest = src1 * src2NEON_VOP(mul_u8, neon_u8, 4)NEON_VOP(mul_u16, neon_u16, 2)#undef NEON_FN/* Polynomial multiplication is like integer multiplication except the   partial products are XORed, not added.  */uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2){    uint32_t mask;    uint32_t result;    result = 0;    while (op1) {        mask = 0;        if (op1 & 1)            mask |= 0xff;        if (op1 & (1 << 8))            mask |= (0xff << 8);        if (op1 & (1 << 16))            mask |= (0xff << 16);        if (op1 & (1 << 24))            mask |= (0xff << 24);        result ^= op2 & mask;        op1 = (op1 >> 1) & 0x7f7f7f7f;        op2 = (op2 << 1) & 0xfefefefe;    }    return result;}#define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0NEON_VOP(tst_u8, neon_u8, 4)NEON_VOP(tst_u16, neon_u16, 2)NEON_VOP(tst_u32, neon_u32, 1)#undef NEON_FN#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0NEON_VOP(ceq_u8, neon_u8, 4)NEON_VOP(ceq_u16, neon_u16, 2)NEON_VOP(ceq_u32, neon_u32, 1)#undef NEON_FN#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : srcNEON_VOP1(abs_s8, neon_s8, 4)NEON_VOP1(abs_s16, neon_s16, 2)#undef NEON_FN/* Count Leading Sign/Zero Bits.  */static inline int do_clz8(uint8_t x){    int n;    for (n = 8; x; n--)        x >>= 1;    return n;}static inline int do_clz16(uint16_t x){    int n;    for (n = 16; x; n--)        x >>= 1;    return n;}#define NEON_FN(dest, src, dummy) dest = do_clz8(src)NEON_VOP1(clz_u8, neon_u8, 4)#undef NEON_FN#define NEON_FN(dest, src, dummy) dest = do_clz16(src)NEON_VOP1(clz_u16, neon_u16, 2)#undef NEON_FN#define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1NEON_VOP1(cls_s8, neon_s8, 4)#undef NEON_FN#define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1NEON_VOP1(cls_s16, neon_s16, 2)#undef NEON_FNuint32_t HELPER(neon_cls_s32)(uint32_t x){    int count;    if ((int32_t)x < 0)        x = ~x;    for (count = 32; x; count--)        x = x >> 1;    return count - 1;}/* Bit count.  */uint32_t HELPER(neon_cnt_u8)(uint32_t x){    x = (x & 0x55555555) + ((x >>  1) & 0x55555555);    x = (x & 0x33333333) + ((x >>  2) & 0x33333333);    x = (x & 0x0f0f0f0f) + ((x >>  4) & 0x0f0f0f0f);    return x;}#define NEON_QDMULH16(dest, src1, src2, round) do { \    uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \    if ((tmp ^ (tmp << 1)) & SIGNBIT) { \        SET_QC(); \        tmp = (tmp >> 31) ^ ~SIGNBIT; \    } \    tmp <<= 1; \    if (round) { \        int32_t old = tmp; \        tmp += 1 << 15; \        if ((int32_t)tmp < old) { \            SET_QC(); \            tmp = SIGNBIT - 1; \        } \    } \    dest = tmp >> 16; \    } while(0)#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0)NEON_VOP_ENV(qdmulh_s16, neon_s16, 2)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1)NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2)#undef NEON_FN#undef NEON_QDMULH16#define NEON_QDMULH32(dest, src1, src2, round) do { \    uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \    if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \        SET_QC(); \        tmp = (tmp >> 63) ^ ~SIGNBIT64; \    } else { \        tmp <<= 1; \    } \    if (round) { \        int64_t old = tmp; \        tmp += (int64_t)1 << 31; \        if ((int64_t)tmp < old) { \            SET_QC(); \            tmp = SIGNBIT64 - 1; \        } \    } \    dest = tmp >> 32; \    } while(0)#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0)NEON_VOP_ENV(qdmulh_s32, neon_s32, 1)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1)NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1)#undef NEON_FN#undef NEON_QDMULH32uint32_t HELPER(neon_narrow_u8)(uint64_t x){    return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u)           | ((x >> 24) & 0xff000000u);}uint32_t HELPER(neon_narrow_u16)(uint64_t x){    return (x & 0xffffu) | ((x >> 16) & 0xffff0000u);}uint32_t HELPER(neon_narrow_high_u8)(uint64_t x){    return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00)            | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000);}uint32_t HELPER(neon_narrow_high_u16)(uint64_t x){    return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);}uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x){    x &= 0xff80ff80ff80ff80ull;    x += 0x0080008000800080ull;    return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00)            | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000);}uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x){    x &= 0xffff8000ffff8000ull;    x += 0x0000800000008000ull;    return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);}uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x){    uint16_t s;    uint8_t d;    uint32_t res = 0;#define SAT8(n) \    s = x >> n; \    if (s > 0xff) { \        d = 0xff; \        SET_QC(); \    } else  { \        d = s; \    } \    res |= (uint32_t)d << (n / 2);    SAT8(0);    SAT8(16);    SAT8(32);    SAT8(48);#undef SAT8    return res;}uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x){    int16_t s;    uint8_t d;    uint32_t res = 0;#define SAT8(n) \    s = x >> n; \    if (s != (int8_t)s) { \        d = (s >> 15) ^ 0x7f; \        SET_QC(); \    } else  { \        d = s; \    } \    res |= (uint32_t)d << (n / 2);    SAT8(0);    SAT8(16);    SAT8(32);    SAT8(48);#undef SAT8    return res;}uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x){    uint32_t high;    uint32_t low;    low = x;    if (low > 0xffff) {        low = 0xffff;        SET_QC();    }    high = x >> 32;    if (high > 0xffff) {        high = 0xffff;        SET_QC();    }    return low | (high << 16);}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?