⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 op_neon.h.svn-base

📁 我们自己开发的一个OSEK操作系统!不知道可不可以?
💻 SVN-BASE
📖 第 1 页 / 共 3 页
字号:
/* * ARM NEON vector operations. * * Copyright (c) 2007 CodeSourcery. * Written by Paul Brook * * This code is licenced under the GPL. *//* Note that for NEON an "l" prefix means it is a wide operation, unlike   scalar arm ops where it means a word size operation.  *//* ??? NEON ops should probably have their own float status.  */#define NFS &env->vfp.fp_status#define NEON_OP(name) void OPPROTO op_neon_##name (void)NEON_OP(getreg_T0){    T0 = *(uint32_t *)((char *) env + PARAM1);}NEON_OP(getreg_T1){    T1 = *(uint32_t *)((char *) env + PARAM1);}NEON_OP(getreg_T2){    T2 = *(uint32_t *)((char *) env + PARAM1);}NEON_OP(setreg_T0){    *(uint32_t *)((char *) env + PARAM1) = T0;}NEON_OP(setreg_T1){    *(uint32_t *)((char *) env + PARAM1) = T1;}NEON_OP(setreg_T2){    *(uint32_t *)((char *) env + PARAM1) = T2;}#define NEON_TYPE1(name, type) \typedef struct \{ \    type v1; \} neon_##name;#ifdef WORDS_BIGENDIAN#define NEON_TYPE2(name, type) \typedef struct \{ \    type v2; \    type v1; \} neon_##name;#define NEON_TYPE4(name, type) \typedef struct \{ \    type v4; \    type v3; \    type v2; \    type v1; \} neon_##name;#else#define NEON_TYPE2(name, type) \typedef struct \{ \    type v1; \    type v2; \} neon_##name;#define NEON_TYPE4(name, type) \typedef struct \{ \    type v1; \    type v2; \    type v3; \    type v4; \} neon_##name;#endifNEON_TYPE4(s8, int8_t)NEON_TYPE4(u8, uint8_t)NEON_TYPE2(s16, int16_t)NEON_TYPE2(u16, uint16_t)NEON_TYPE1(s32, int32_t)NEON_TYPE1(u32, uint32_t)#undef NEON_TYPE4#undef NEON_TYPE2#undef NEON_TYPE1/* Copy from a uint32_t to a vector structure type.  */#define NEON_UNPACK(vtype, dest, val) do { \    union { \        vtype v; \        uint32_t i; \    } conv_u; \    conv_u.i = (val); \    dest = conv_u.v; \    } while(0)/* Copy from a vector structure type to a uint32_t.  */#define NEON_PACK(vtype, dest, val) do { \    union { \        vtype v; \        uint32_t i; \    } conv_u; \    conv_u.v = (val); \    dest = conv_u.i; \    } while(0)#define NEON_DO1 \    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1);#define NEON_DO2 \    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \    NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2);#define NEON_DO4 \    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \    NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \    NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \    NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4);#define NEON_VOP(name, vtype, n) \NEON_OP(name) \{ \    vtype vsrc1; \    vtype vsrc2; \    vtype vdest; \    NEON_UNPACK(vtype, vsrc1, T0); \    NEON_UNPACK(vtype, vsrc2, T1); \    NEON_DO##n; \    NEON_PACK(vtype, T0, vdest); \    FORCE_RET(); \}#define NEON_VOP1(name, vtype, n) \NEON_OP(name) \{ \    vtype vsrc1; \    vtype vdest; \    NEON_UNPACK(vtype, vsrc1, T0); \    NEON_DO##n; \    NEON_PACK(vtype, T0, vdest); \    FORCE_RET(); \}/* Pairwise operations.  *//* For 32-bit elements each segment only contains a single element, so   the elementwise and pairwise operations are the same.  */#define NEON_PDO2 \    NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \    NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2);#define NEON_PDO4 \    NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \    NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \    NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \    NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \#define NEON_POP(name, vtype, n) \NEON_OP(name) \{ \    vtype vsrc1; \    vtype vsrc2; \    vtype vdest; \    NEON_UNPACK(vtype, vsrc1, T0); \    NEON_UNPACK(vtype, vsrc2, T1); \    NEON_PDO##n; \    NEON_PACK(vtype, T0, vdest); \    FORCE_RET(); \}#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1NEON_VOP(hadd_s8, neon_s8, 4)NEON_VOP(hadd_u8, neon_u8, 4)NEON_VOP(hadd_s16, neon_s16, 2)NEON_VOP(hadd_u16, neon_u16, 2)#undef NEON_FNNEON_OP(hadd_s32){    int32_t src1 = T0;    int32_t src2 = T1;    int32_t dest;    dest = (src1 >> 1) + (src2 >> 1);    if (src1 & src2 & 1)        dest++;    T0 = dest;    FORCE_RET();}NEON_OP(hadd_u32){    uint32_t src1 = T0;    uint32_t src2 = T1;    uint32_t dest;    dest = (src1 >> 1) + (src2 >> 1);    if (src1 & src2 & 1)        dest++;    T0 = dest;    FORCE_RET();}#define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1NEON_VOP(rhadd_s8, neon_s8, 4)NEON_VOP(rhadd_u8, neon_u8, 4)NEON_VOP(rhadd_s16, neon_s16, 2)NEON_VOP(rhadd_u16, neon_u16, 2)#undef NEON_FNNEON_OP(rhadd_s32){    int32_t src1 = T0;    int32_t src2 = T1;    int32_t dest;    dest = (src1 >> 1) + (src2 >> 1);    if ((src1 | src2) & 1)        dest++;    T0 = dest;    FORCE_RET();}NEON_OP(rhadd_u32){    uint32_t src1 = T0;    uint32_t src2 = T1;    uint32_t dest;    dest = (src1 >> 1) + (src2 >> 1);    if ((src1 | src2) & 1)        dest++;    T0 = dest;    FORCE_RET();}#define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1NEON_VOP(hsub_s8, neon_s8, 4)NEON_VOP(hsub_u8, neon_u8, 4)NEON_VOP(hsub_s16, neon_s16, 2)NEON_VOP(hsub_u16, neon_u16, 2)#undef NEON_FNNEON_OP(hsub_s32){    int32_t src1 = T0;    int32_t src2 = T1;    int32_t dest;    dest = (src1 >> 1) - (src2 >> 1);    if ((~src1) & src2 & 1)        dest--;    T0 = dest;    FORCE_RET();}NEON_OP(hsub_u32){    uint32_t src1 = T0;    uint32_t src2 = T1;    uint32_t dest;    dest = (src1 >> 1) - (src2 >> 1);    if ((~src1) & src2 & 1)        dest--;    T0 = dest;    FORCE_RET();}/* ??? bsl, bif and bit are all the same op, just with the oparands in a   differnet order.  It's currently easier to have 3 differnt ops than   rearange the operands.  *//* Bitwise Select.  */NEON_OP(bsl){    T0 = (T0 & T2) | (T1 & ~T2);}/* Bitwise Insert If True.  */NEON_OP(bit){    T0 = (T0 & T1) | (T2 & ~T1);}/* Bitwise Insert If False.  */NEON_OP(bif){    T0 = (T2 & T1) | (T0 & ~T1);}#define NEON_USAT(dest, src1, src2, type) do { \    uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \    if (tmp != (type)tmp) { \        env->QF = 1; \        dest = ~0; \    } else { \        dest = tmp; \    }} while(0)#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)NEON_VOP(qadd_u8, neon_u8, 4)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)NEON_VOP(qadd_u16, neon_u16, 2)#undef NEON_FN#undef NEON_USAT#define NEON_SSAT(dest, src1, src2, type) do { \    int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \    if (tmp != (type)tmp) { \        env->QF = 1; \        if (src2 > 0) { \            tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \        } else { \            tmp = 1 << (sizeof(type) * 8 - 1); \        } \    } \    dest = tmp; \    } while(0)#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)NEON_VOP(qadd_s8, neon_s8, 4)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)NEON_VOP(qadd_s16, neon_s16, 2)#undef NEON_FN#undef NEON_SSAT#define NEON_USAT(dest, src1, src2, type) do { \    uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \    if (tmp != (type)tmp) { \        env->QF = 1; \        dest = 0; \    } else { \        dest = tmp; \    }} while(0)#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)NEON_VOP(qsub_u8, neon_u8, 4)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)NEON_VOP(qsub_u16, neon_u16, 2)#undef NEON_FN#undef NEON_USAT#define NEON_SSAT(dest, src1, src2, type) do { \    int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \    if (tmp != (type)tmp) { \        env->QF = 1; \        if (src2 < 0) { \            tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \        } else { \            tmp = 1 << (sizeof(type) * 8 - 1); \        } \    } \    dest = tmp; \    } while(0)#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)NEON_VOP(qsub_s8, neon_s8, 4)#undef NEON_FN#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)NEON_VOP(qsub_s16, neon_s16, 2)#undef NEON_FN#undef NEON_SSAT#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0NEON_VOP(cgt_s8, neon_s8, 4)NEON_VOP(cgt_u8, neon_u8, 4)NEON_VOP(cgt_s16, neon_s16, 2)NEON_VOP(cgt_u16, neon_u16, 2)NEON_VOP(cgt_s32, neon_s32, 1)NEON_VOP(cgt_u32, neon_u32, 1)#undef NEON_FN#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0NEON_VOP(cge_s8, neon_s8, 4)NEON_VOP(cge_u8, neon_u8, 4)NEON_VOP(cge_s16, neon_s16, 2)NEON_VOP(cge_u16, neon_u16, 2)NEON_VOP(cge_s32, neon_s32, 1)NEON_VOP(cge_u32, neon_u32, 1)#undef NEON_FN#define NEON_FN(dest, src1, src2) do { \    int8_t tmp; \    tmp = (int8_t)src2; \    if (tmp < 0) { \        dest = src1 >> -tmp; \    } else { \        dest = src1 << tmp; \    }} while (0)NEON_VOP(shl_s8, neon_s8, 4)NEON_VOP(shl_u8, neon_u8, 4)NEON_VOP(shl_s16, neon_s16, 2)NEON_VOP(shl_u16, neon_u16, 2)NEON_VOP(shl_s32, neon_s32, 1)NEON_VOP(shl_u32, neon_u32, 1)#undef NEON_FNNEON_OP(shl_u64){    int8_t shift = T2;    uint64_t val = T0 | ((uint64_t)T1 << 32);    if (shift < 0) {        val >>= -shift;    } else {        val <<= shift;    }    T0 = val;    T1 = val >> 32;    FORCE_RET();}NEON_OP(shl_s64){    int8_t shift = T2;    int64_t val = T0 | ((uint64_t)T1 << 32);    if (shift < 0) {        val >>= -shift;    } else {        val <<= shift;    }    T0 = val;    T1 = val >> 32;    FORCE_RET();}#define NEON_FN(dest, src1, src2) do { \    int8_t tmp; \    tmp = (int8_t)src1; \    if (tmp < 0) { \        dest = (src2 + (1 << (-1 - tmp))) >> -tmp; \    } else { \        dest = src2 << tmp; \    }} while (0)NEON_VOP(rshl_s8, neon_s8, 4)NEON_VOP(rshl_u8, neon_u8, 4)NEON_VOP(rshl_s16, neon_s16, 2)NEON_VOP(rshl_u16, neon_u16, 2)NEON_VOP(rshl_s32, neon_s32, 1)NEON_VOP(rshl_u32, neon_u32, 1)#undef NEON_FNNEON_OP(rshl_u64){    int8_t shift = T2;    uint64_t val = T0 | ((uint64_t)T1 << 32);    if (shift < 0) {        val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;        val >>= -shift;    } else {        val <<= shift;    }    T0 = val;    T1 = val >> 32;    FORCE_RET();}NEON_OP(rshl_s64){    int8_t shift = T2;    int64_t val = T0 | ((uint64_t)T1 << 32);    if (shift < 0) {        val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;    } else {        val <<= shift;    }    T0 = val;    T1 = val >> 32;    FORCE_RET();}#define NEON_FN(dest, src1, src2) do { \    int8_t tmp; \    tmp = (int8_t)src1; \    if (tmp < 0) { \        dest = src2 >> -tmp; \    } else { \        dest = src2 << tmp; \        if ((dest >> tmp) != src2) { \            env->QF = 1; \            dest = ~0; \        } \    }} while (0)NEON_VOP(qshl_s8, neon_s8, 4)NEON_VOP(qshl_s16, neon_s16, 2)NEON_VOP(qshl_s32, neon_s32, 1)#undef NEON_FNNEON_OP(qshl_s64){    int8_t shift = T2;    int64_t val = T0 | ((uint64_t)T1 << 32);    if (shift < 0) {        val >>= -shift;    } else {        int64_t tmp = val;        val <<= shift;        if ((val >> shift) != tmp) {            env->QF = 1;            val = (tmp >> 63) ^ 0x7fffffffffffffffULL;        }    }    T0 = val;    T1 = val >> 32;    FORCE_RET();}#define NEON_FN(dest, src1, src2) do { \    int8_t tmp; \    tmp = (int8_t)src1; \    if (tmp < 0) { \        dest = src2 >> -tmp; \    } else { \        dest = src2 << tmp; \        if ((dest >> tmp) != src2) { \            env->QF = 1; \            dest = src2 >> 31; \        } \    }} while (0)NEON_VOP(qshl_u8, neon_u8, 4)NEON_VOP(qshl_u16, neon_u16, 2)NEON_VOP(qshl_u32, neon_u32, 1)#undef NEON_FNNEON_OP(qshl_u64){    int8_t shift = T2;    uint64_t val = T0 | ((uint64_t)T1 << 32);    if (shift < 0) {        val >>= -shift;    } else {        uint64_t tmp = val;        val <<= shift;        if ((val >> shift) != tmp) {            env->QF = 1;            val = ~(uint64_t)0;        }    }    T0 = val;    T1 = val >> 32;    FORCE_RET();}#define NEON_FN(dest, src1, src2) do { \    int8_t tmp; \    tmp = (int8_t)src1; \    if (tmp < 0) { \        dest = (src2 + (1 << (-1 - tmp))) >> -tmp; \    } else { \        dest = src2 << tmp; \        if ((dest >> tmp) != src2) { \            dest = ~0; \        } \    }} while (0)NEON_VOP(qrshl_s8, neon_s8, 4)NEON_VOP(qrshl_s16, neon_s16, 2)NEON_VOP(qrshl_s32, neon_s32, 1)#undef NEON_FN#define NEON_FN(dest, src1, src2) do { \    int8_t tmp; \    tmp = (int8_t)src1; \    if (tmp < 0) { \        dest = (src2 + (1 << (-1 - tmp))) >> -tmp; \    } else { \        dest = src2 << tmp; \        if ((dest >> tmp) != src2) { \            env->QF = 1; \            dest = src2 >> 31; \        } \    }} while (0)NEON_VOP(qrshl_u8, neon_u8, 4)NEON_VOP(qrshl_u16, neon_u16, 2)NEON_VOP(qrshl_u32, neon_u32, 1)#undef NEON_FN#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2NEON_VOP(max_s8, neon_s8, 4)NEON_VOP(max_u8, neon_u8, 4)NEON_VOP(max_s16, neon_s16, 2)NEON_VOP(max_u16, neon_u16, 2)NEON_VOP(max_s32, neon_s32, 1)NEON_VOP(max_u32, neon_u32, 1)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -