📄 qpel.c
字号:
/* this is optimized for sh, which have post increment addressing (*p++) some cpu may be index (p[n]) faster than post increment (*p++)*/#define LD(adr) *(uint32_t*)(adr)#define PIXOP2(OPNAME, OP) \/*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do {\ OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}\\static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do {\ OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}\\static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do {\ OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}\\static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do {\ OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}\\static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do {\ OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ OP(LP(dst+8),rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ OP(LP(dst+12),rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}*/\\static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do {\ OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}\\static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do {\ OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}\\static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do {\ OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}\\static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do {\ OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ OP(LP(dst+8),rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ OP(LP(dst+12),rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}\\static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do { /* onlye src2 aligned */\ OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}\\static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do {\ OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}\\static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do {\ OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}\\static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do {\ OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}\\static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do {\ OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ OP(LP(dst+8),no_rnd_avg32(LP(src1+8),LP(src2+8)) ); \ OP(LP(dst+12),no_rnd_avg32(LP(src1+12),LP(src2+12)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}\\static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{\ do {\ OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ OP(LP(dst+8),rnd_avg32(LP(src1+8),LP(src2+8)) ); \ OP(LP(dst+12),rnd_avg32(LP(src1+12),LP(src2+12)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ } while(--h); \}\\static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{ OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \\static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{ OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \\static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{ OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \\static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \{ OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \\static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ do { \ uint32_t a0,a1,a2,a3; \ UNPACK(a0,a1,LP(src1),LP(src2)); \ UNPACK(a2,a3,LP(src3),LP(src4)); \ OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ src1+=src_stride1;\ src2+=src_stride2;\ src3+=src_stride3;\ src4+=src_stride4;\ dst+=dst_stride;\ } while(--h); \} \\static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ do { \ uint32_t a0,a1,a2,a3; \ UNPACK(a0,a1,LP(src1),LP(src2)); \ UNPACK(a2,a3,LP(src3),LP(src4)); \ OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ src1+=src_stride1;\ src2+=src_stride2;\ src3+=src_stride3;\ src4+=src_stride4;\ dst+=dst_stride;\ } while(--h); \} \\static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ do { \ uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ UNPACK(a0,a1,LD32(src1),LP(src2)); \ UNPACK(a2,a3,LP(src3),LP(src4)); \ OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ src1+=src_stride1;\ src2+=src_stride2;\ src3+=src_stride3;\ src4+=src_stride4;\ dst+=dst_stride;\ } while(--h); \} \\static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ do { \ uint32_t a0,a1,a2,a3; \ UNPACK(a0,a1,LD32(src1),LP(src2)); \ UNPACK(a2,a3,LP(src3),LP(src4)); \ OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ src1+=src_stride1;\ src2+=src_stride2;\ src3+=src_stride3;\ src4+=src_stride4;\ dst+=dst_stride;\ } while(--h); \} \\static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ do { \ uint32_t a0,a1,a2,a3; \ UNPACK(a0,a1,LP(src1),LP(src2)); \ UNPACK(a2,a3,LP(src3),LP(src4)); \ OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ src1+=src_stride1;\ src2+=src_stride2;\ src3+=src_stride3;\ src4+=src_stride4;\ dst+=dst_stride;\ } while(--h); \} \\static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ do { \ uint32_t a0,a1,a2,a3; \ UNPACK(a0,a1,LP(src1),LP(src2)); \ UNPACK(a2,a3,LP(src3),LP(src4)); \ OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ src1+=src_stride1;\ src2+=src_stride2;\ src3+=src_stride3;\ src4+=src_stride4;\ dst+=dst_stride;\ } while(--h); \} \\static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ do { /* src1 is unaligned */\ uint32_t a0,a1,a2,a3; \ UNPACK(a0,a1,LD32(src1),LP(src2)); \ UNPACK(a2,a3,LP(src3),LP(src4)); \ OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -