📄 qpel.c

📁 ffmpeg源码分析
💻 C
📖 第 1 页 / 共 5 页
字号:
                OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \                src1+=src_stride1;\                src2+=src_stride2;\                src3+=src_stride3;\                src4+=src_stride4;\                dst+=dst_stride;\        } while(--h); \} \\static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\        do { /* src1 is unaligned */\                uint32_t a0,a1,a2,a3; \                UNPACK(a0,a1,LD32(src1),LP(src2)); \                UNPACK(a2,a3,LP(src3),LP(src4)); \                OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \                UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \                UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \                OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \                UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \                UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \                OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \                UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \                UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \                OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \                src1+=src_stride1;\                src2+=src_stride2;\                src3+=src_stride3;\                src4+=src_stride4;\                dst+=dst_stride;\        } while(--h); \} \\static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\        do { \                uint32_t a0,a1,a2,a3; \                UNPACK(a0,a1,LD32(src1),LP(src2)); \                UNPACK(a2,a3,LP(src3),LP(src4)); \                OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \                UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \                UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \                OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \                UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \                UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \                OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \                UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \                UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \                OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \                src1+=src_stride1;\                src2+=src_stride2;\                src3+=src_stride3;\                src4+=src_stride4;\                dst+=dst_stride;\        } while(--h); \} \\#define op_avg(a, b) a = rnd_avg32(a,b)#define op_put(a, b) a = bPIXOP2(avg, op_avg)PIXOP2(put, op_put)#undef op_avg#undef op_put#define avg2(a,b) ((a+b+1)>>1)#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder){    const int A=(16-x16)*(16-y16);    const int B=(   x16)*(16-y16);    const int C=(16-x16)*(   y16);    const int D=(   x16)*(   y16);    do {        int t0,t1,t2,t3;        uint8_t *s0 = src;        uint8_t *s1 = src+stride;        t0 = *s0++; t2 = *s1++;        t1 = *s0++; t3 = *s1++;        dst[0]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;        t0 = *s0++; t2 = *s1++;        dst[1]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;        t1 = *s0++; t3 = *s1++;        dst[2]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;        t0 = *s0++; t2 = *s1++;        dst[3]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;        t1 = *s0++; t3 = *s1++;        dst[4]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;        t0 = *s0++; t2 = *s1++;        dst[5]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;        t1 = *s0++; t3 = *s1++;        dst[6]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;        t0 = *s0++; t2 = *s1++;        dst[7]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;        dst+= stride;        src+= stride;    }while(--h);}static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height){    int y, vx, vy;    const int s= 1<<shift;    width--;    height--;    for(y=0; y<h; y++){        int x;        vx= ox;        vy= oy;        for(x=0; x<8; x++){ //XXX FIXME optimize            int src_x, src_y, frac_x, frac_y, index;            src_x= vx>>16;            src_y= vy>>16;            frac_x= src_x&(s-1);            frac_y= src_y&(s-1);            src_x>>=shift;            src_y>>=shift;            if((unsigned)src_x < width){                if((unsigned)src_y < height){                    index= src_x + src_y*stride;                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)                                           + src[index       +1]*   frac_x )*(s-frac_y)                                        + (  src[index+stride  ]*(s-frac_x)                                           + src[index+stride+1]*   frac_x )*   frac_y                                        + r)>>(shift*2);                }else{                    index= src_x + clip(src_y, 0, height)*stride;                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)                                          + src[index       +1]*   frac_x )*s                                        + r)>>(shift*2);                }            }else{                if((unsigned)src_y < height){                    index= clip(src_x, 0, width) + src_y*stride;                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)                                           + src[index+stride  ]*   frac_y )*s                                        + r)>>(shift*2);                }else{                    index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;                    dst[y*stride + x]=    src[index         ];                }            }            vx+= dxx;            vy+= dyx;        }        ox += dxy;        oy += dyy;    }}#define H264_CHROMA_MC(OPNAME, OP)\static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\    const int A=(8-x)*(8-y);\    const int B=(  x)*(8-y);\    const int C=(8-x)*(  y);\    const int D=(  x)*(  y);\    \    assert(x<8 && y<8 && x>=0 && y>=0);\\    do {\        int t0,t1,t2,t3; \        uint8_t *s0 = src; \        uint8_t *s1 = src+stride; \        t0 = *s0++; t2 = *s1++; \        t1 = *s0++; t3 = *s1++; \        OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\        t0 = *s0++; t2 = *s1++; \        OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\        dst+= stride;\        src+= stride;\    }while(--h);\}\\static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\    const int A=(8-x)*(8-y);\    const int B=(  x)*(8-y);\    const int C=(8-x)*(  y);\    const int D=(  x)*(  y);\    \    assert(x<8 && y<8 && x>=0 && y>=0);\\    do {\        int t0,t1,t2,t3; \        uint8_t *s0 = src; \        uint8_t *s1 = src+stride; \        t0 = *s0++; t2 = *s1++; \        t1 = *s0++; t3 = *s1++; \        OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\        t0 = *s0++; t2 = *s1++; \        OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\        t1 = *s0++; t3 = *s1++; \        OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\        t0 = *s0++; t2 = *s1++; \        OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\        dst+= stride;\        src+= stride;\    }while(--h);\}\\static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\    const int A=(8-x)*(8-y);\    const int B=(  x)*(8-y);\    const int C=(8-x)*(  y);\    const int D=(  x)*(  y);\    \    assert(x<8 && y<8 && x>=0 && y>=0);\\    do {\        int t0,t1,t2,t3; \        uint8_t *s0 = src; \        uint8_t *s1 = src+stride; \        t0 = *s0++; t2 = *s1++; \        t1 = *s0++; t3 = *s1++; \        OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\        t0 = *s0++; t2 = *s1++; \        OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\        t1 = *s0++; t3 = *s1++; \        OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\        t0 = *s0++; t2 = *s1++; \        OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\        t1 = *s0++; t3 = *s1++; \        OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\        t0 = *s0++; t2 = *s1++; \        OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\        t1 = *s0++; t3 = *s1++; \        OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\        t0 = *s0++; t2 = *s1++; \        OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\        dst+= stride;\        src+= stride;\    }while(--h);\}#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)#define op_put(a, b) a = (((b) + 32)>>6)H264_CHROMA_MC(put_       , op_put)H264_CHROMA_MC(avg_       , op_avg)#undef op_avg#undef op_put/* not yet optimized */static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){    int i;    for(i=0; i<h; i++)    {        ST32(dst   , LD32(src   ));        dst+=dstStride;        src+=srcStride;    }}static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){    int i;    for(i=0; i<h; i++)    {        ST32(dst   , LD32(src   ));        ST32(dst+4 , LD32(src+4 ));        dst+=dstStride;        src+=srcStride;    }}static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){    int i;    for(i=0; i<h; i++)    {        ST32(dst   , LD32(src   ));        ST32(dst+4 , LD32(src+4 ));        ST32(dst+8 , LD32(src+8 ));        ST32(dst+12, LD32(src+12));        dst+=dstStride;        src+=srcStride;    }}static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){    int i;    for(i=0; i<h; i++)    {        ST32(dst   , LD32(src   ));        ST32(dst+4 , LD32(src+4 ));        ST32(dst+8 , LD32(src+8 ));        ST32(dst+12, LD32(src+12));        dst[16]= src[16];        dst+=dstStride;        src+=srcStride;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -