📄 qpel.c
字号:
OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ src1+=src_stride1;\ src2+=src_stride2;\ src3+=src_stride3;\ src4+=src_stride4;\ dst+=dst_stride;\ } while(--h); \} \\static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ do { /* src1 is unaligned */\ uint32_t a0,a1,a2,a3; \ UNPACK(a0,a1,LD32(src1),LP(src2)); \ UNPACK(a2,a3,LP(src3),LP(src4)); \ OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ src1+=src_stride1;\ src2+=src_stride2;\ src3+=src_stride3;\ src4+=src_stride4;\ dst+=dst_stride;\ } while(--h); \} \\static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ do { \ uint32_t a0,a1,a2,a3; \ UNPACK(a0,a1,LD32(src1),LP(src2)); \ UNPACK(a2,a3,LP(src3),LP(src4)); \ OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ src1+=src_stride1;\ src2+=src_stride2;\ src3+=src_stride3;\ src4+=src_stride4;\ dst+=dst_stride;\ } while(--h); \} \\#define op_avg(a, b) a = rnd_avg32(a,b)#define op_put(a, b) a = bPIXOP2(avg, op_avg)PIXOP2(put, op_put)#undef op_avg#undef op_put#define avg2(a,b) ((a+b+1)>>1)#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder){ const int A=(16-x16)*(16-y16); const int B=( x16)*(16-y16); const int C=(16-x16)*( y16); const int D=( x16)*( y16); do { int t0,t1,t2,t3; uint8_t *s0 = src; uint8_t *s1 = src+stride; t0 = *s0++; t2 = *s1++; t1 = *s0++; t3 = *s1++; dst[0]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; t0 = *s0++; t2 = *s1++; dst[1]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; t1 = *s0++; t3 = *s1++; dst[2]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; t0 = *s0++; t2 = *s1++; dst[3]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; t1 = *s0++; t3 = *s1++; dst[4]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; t0 = *s0++; t2 = *s1++; dst[5]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; t1 = *s0++; t3 = *s1++; dst[6]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; t0 = *s0++; t2 = *s1++; dst[7]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; dst+= stride; src+= stride; }while(--h);}static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height){ int y, vx, vy; const int s= 1<<shift; width--; height--; for(y=0; y<h; y++){ int x; vx= ox; vy= oy; for(x=0; x<8; x++){ //XXX FIXME optimize int src_x, src_y, frac_x, frac_y, index; src_x= vx>>16; src_y= vy>>16; frac_x= src_x&(s-1); frac_y= src_y&(s-1); src_x>>=shift; src_y>>=shift; if((unsigned)src_x < width){ if((unsigned)src_y < height){ index= src_x + src_y*stride; dst[y*stride + x]= ( ( src[index ]*(s-frac_x) + src[index +1]* frac_x )*(s-frac_y) + ( src[index+stride ]*(s-frac_x) + src[index+stride+1]* frac_x )* frac_y + r)>>(shift*2); }else{ index= src_x + clip(src_y, 0, height)*stride; dst[y*stride + x]= ( ( src[index ]*(s-frac_x) + src[index +1]* frac_x )*s + r)>>(shift*2); } }else{ if((unsigned)src_y < height){ index= clip(src_x, 0, width) + src_y*stride; dst[y*stride + x]= ( ( src[index ]*(s-frac_y) + src[index+stride ]* frac_y )*s + r)>>(shift*2); }else{ index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride; dst[y*stride + x]= src[index ]; } } vx+= dxx; vy+= dyx; } ox += dxy; oy += dyy; }}#define H264_CHROMA_MC(OPNAME, OP)\static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ const int A=(8-x)*(8-y);\ const int B=( x)*(8-y);\ const int C=(8-x)*( y);\ const int D=( x)*( y);\ \ assert(x<8 && y<8 && x>=0 && y>=0);\\ do {\ int t0,t1,t2,t3; \ uint8_t *s0 = src; \ uint8_t *s1 = src+stride; \ t0 = *s0++; t2 = *s1++; \ t1 = *s0++; t3 = *s1++; \ OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ t0 = *s0++; t2 = *s1++; \ OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ dst+= stride;\ src+= stride;\ }while(--h);\}\\static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ const int A=(8-x)*(8-y);\ const int B=( x)*(8-y);\ const int C=(8-x)*( y);\ const int D=( x)*( y);\ \ assert(x<8 && y<8 && x>=0 && y>=0);\\ do {\ int t0,t1,t2,t3; \ uint8_t *s0 = src; \ uint8_t *s1 = src+stride; \ t0 = *s0++; t2 = *s1++; \ t1 = *s0++; t3 = *s1++; \ OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ t0 = *s0++; t2 = *s1++; \ OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ t1 = *s0++; t3 = *s1++; \ OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\ t0 = *s0++; t2 = *s1++; \ OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\ dst+= stride;\ src+= stride;\ }while(--h);\}\\static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ const int A=(8-x)*(8-y);\ const int B=( x)*(8-y);\ const int C=(8-x)*( y);\ const int D=( x)*( y);\ \ assert(x<8 && y<8 && x>=0 && y>=0);\\ do {\ int t0,t1,t2,t3; \ uint8_t *s0 = src; \ uint8_t *s1 = src+stride; \ t0 = *s0++; t2 = *s1++; \ t1 = *s0++; t3 = *s1++; \ OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ t0 = *s0++; t2 = *s1++; \ OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ t1 = *s0++; t3 = *s1++; \ OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\ t0 = *s0++; t2 = *s1++; \ OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\ t1 = *s0++; t3 = *s1++; \ OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\ t0 = *s0++; t2 = *s1++; \ OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\ t1 = *s0++; t3 = *s1++; \ OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\ t0 = *s0++; t2 = *s1++; \ OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\ dst+= stride;\ src+= stride;\ }while(--h);\}#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)#define op_put(a, b) a = (((b) + 32)>>6)H264_CHROMA_MC(put_ , op_put)H264_CHROMA_MC(avg_ , op_avg)#undef op_avg#undef op_put/* not yet optimized */static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ int i; for(i=0; i<h; i++) { ST32(dst , LD32(src )); dst+=dstStride; src+=srcStride; }}static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ int i; for(i=0; i<h; i++) { ST32(dst , LD32(src )); ST32(dst+4 , LD32(src+4 )); dst+=dstStride; src+=srcStride; }}static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ int i; for(i=0; i<h; i++) { ST32(dst , LD32(src )); ST32(dst+4 , LD32(src+4 )); ST32(dst+8 , LD32(src+8 )); ST32(dst+12, LD32(src+12)); dst+=dstStride; src+=srcStride; }}static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ int i; for(i=0; i<h; i++) { ST32(dst , LD32(src )); ST32(dst+4 , LD32(src+4 )); ST32(dst+8 , LD32(src+8 )); ST32(dst+12, LD32(src+12)); dst[16]= src[16]; dst+=dstStride; src+=srcStride;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -