📄 qpel.c
字号:
}}static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ int i; for(i=0; i<h; i++) { ST32(dst , LD32(src )); ST32(dst+4 , LD32(src+4 )); dst[8]= src[8]; dst+=dstStride; src+=srcStride; }}/* end not optimized */#define QPEL_MC(r, OPNAME, RND, OP) \static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ uint8_t *cm = cropTbl + MAX_NEG_CROP;\ do {\ uint8_t *s = src; \ int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ src0= *s++;\ src1= *s++;\ src2= *s++;\ src3= *s++;\ src4= *s++;\ OP(dst[0], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ src5= *s++;\ OP(dst[1], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ src6= *s++;\ OP(dst[2], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ src7= *s++;\ OP(dst[3], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ src8= *s++;\ OP(dst[4], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ OP(dst[5], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ OP(dst[6], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ OP(dst[7], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ dst+=dstStride;\ src+=srcStride;\ }while(--h);\}\\static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ uint8_t *cm = cropTbl + MAX_NEG_CROP;\ int w=8;\ do{\ uint8_t *s = src, *d=dst;\ int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ src0 = *s; s+=srcStride; \ src1 = *s; s+=srcStride; \ src2 = *s; s+=srcStride; \ src3 = *s; s+=srcStride; \ src4 = *s; s+=srcStride; \ OP(*d, (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));d+=dstStride;\ src5 = *s; s+=srcStride; \ OP(*d, (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));d+=dstStride;\ src6 = *s; s+=srcStride; \ OP(*d, (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));d+=dstStride;\ src7 = *s; s+=srcStride; \ OP(*d, (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));d+=dstStride;\ src8 = *s; \ OP(*d, (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));d+=dstStride;\ OP(*d, (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));d+=dstStride;\ OP(*d, (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));d+=dstStride;\ OP(*d, (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ dst++;\ src++;\ }while(--w);\}\\static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ uint8_t *cm = cropTbl + MAX_NEG_CROP;\ do {\ uint8_t *s = src;\ int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ int src9,src10,src11,src12,src13,src14,src15,src16;\ src0= *s++;\ src1= *s++;\ src2= *s++;\ src3= *s++;\ src4= *s++;\ OP(dst[ 0], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ src5= *s++;\ OP(dst[ 1], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ src6= *s++;\ OP(dst[ 2], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ src7= *s++;\ OP(dst[ 3], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ src8= *s++;\ OP(dst[ 4], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ src9= *s++;\ OP(dst[ 5], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ src10= *s++;\ OP(dst[ 6], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ src11= *s++;\ OP(dst[ 7], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ src12= *s++;\ OP(dst[ 8], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ src13= *s++;\ OP(dst[ 9], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ src14= *s++;\ OP(dst[10], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ src15= *s++;\ OP(dst[11], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ src16= *s++;\ OP(dst[12], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ OP(dst[13], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ OP(dst[14], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ OP(dst[15], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ dst+=dstStride;\ src+=srcStride;\ }while(--h);\}\\static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ uint8_t *cm = cropTbl + MAX_NEG_CROP;\ int w=16;\ do {\ uint8_t *s = src, *d=dst;\ int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ int src9,src10,src11,src12,src13,src14,src15,src16;\ src0 = *s; s+=srcStride; \ src1 = *s; s+=srcStride; \ src2 = *s; s+=srcStride; \ src3 = *s; s+=srcStride; \ src4 = *s; s+=srcStride; \ OP(*d, (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));d+=dstStride;\ src5 = *s; s+=srcStride; \ OP(*d, (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));d+=dstStride;\ src6 = *s; s+=srcStride; \ OP(*d, (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));d+=dstStride;\ src7 = *s; s+=srcStride; \ OP(*d, (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));d+=dstStride;\ src8 = *s; s+=srcStride; \ OP(*d, (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));d+=dstStride;\ src9 = *s; s+=srcStride; \ OP(*d, (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));d+=dstStride;\ src10 = *s; s+=srcStride; \ OP(*d, (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));d+=dstStride;\ src11 = *s; s+=srcStride; \ OP(*d, (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));d+=dstStride;\ src12 = *s; s+=srcStride; \ OP(*d, (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));d+=dstStride;\ src13 = *s; s+=srcStride; \ OP(*d, (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));d+=dstStride;\ src14 = *s; s+=srcStride; \ OP(*d, (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));d+=dstStride;\ src15 = *s; s+=srcStride; \ OP(*d, (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));d+=dstStride;\ src16 = *s; \ OP(*d, (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));d+=dstStride;\ OP(*d, (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));d+=dstStride;\ OP(*d, (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));d+=dstStride;\ OP(*d, (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ dst++;\ src++;\ }while(--w);\}\\static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ OPNAME ## pixels8_c(dst, src, stride, 8);\}\\static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t half[64];\ put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ OPNAME ## pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);\}\\static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\}\\static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t half[64];\ put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ OPNAME ## pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);\}\\static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ uint8_t half[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ OPNAME ## pixels8_l2_aligned(dst, full, half, stride, 16, 8, 8);\}\\static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ copy_block9(full, src, 16, stride, 9);\ OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\}\\static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ uint8_t half[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ OPNAME ## pixels8_l2_aligned(dst, full+16, half, stride, 16, 8, 8);\}\static void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ uint8_t halfH[72];\ uint8_t halfV[64];\ uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l4_aligned(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\}\static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ uint8_t halfH[72];\ uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\}\static void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ uint8_t halfH[72];\ uint8_t halfV[64];\ uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l4_aligned0(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\}\static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ uint8_t halfH[72];\ uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\}\static void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ uint8_t halfH[72];\ uint8_t halfV[64];\ uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l4_aligned(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\}\static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ uint8_t halfH[72];\ uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\}\static void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ uint8_t halfH[72];\ uint8_t halfV[64];\ uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l4_aligned0(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\}\static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ uint8_t halfH[72];\ uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\}\static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t halfH[72];\ uint8_t halfHV[64];\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\}\static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t halfH[72];\ uint8_t halfHV[64];\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -