📄 qpel.c
字号:
} \
\
static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
do { /* src1 is unaligned */\
uint32_t a0,a1,a2,a3; \
UNPACK(a0,a1,LD32(src1),LP(src2)); \
UNPACK(a2,a3,LP(src3),LP(src4)); \
OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \
UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \
UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
src1+=src_stride1;\
src2+=src_stride2;\
src3+=src_stride3;\
src4+=src_stride4;\
dst+=dst_stride;\
} while(--h); \
} \
\
static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
do { \
uint32_t a0,a1,a2,a3; \
UNPACK(a0,a1,LD32(src1),LP(src2)); \
UNPACK(a2,a3,LP(src3),LP(src4)); \
OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \
UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \
UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
src1+=src_stride1;\
src2+=src_stride2;\
src3+=src_stride3;\
src4+=src_stride4;\
dst+=dst_stride;\
} while(--h); \
} \
\
#define op_avg(a, b) a = rnd_avg32(a,b)
#define op_put(a, b) a = b
PIXOP2(avg, op_avg)
PIXOP2(put, op_put)
#undef op_avg
#undef op_put
#define avg2(a,b) ((a+b+1)>>1)
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
{
const int A=(16-x16)*(16-y16);
const int B=( x16)*(16-y16);
const int C=(16-x16)*( y16);
const int D=( x16)*( y16);
do {
int t0,t1,t2,t3;
uint8_t *s0 = src;
uint8_t *s1 = src+stride;
t0 = *s0++; t2 = *s1++;
t1 = *s0++; t3 = *s1++;
dst[0]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
t0 = *s0++; t2 = *s1++;
dst[1]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
t1 = *s0++; t3 = *s1++;
dst[2]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
t0 = *s0++; t2 = *s1++;
dst[3]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
t1 = *s0++; t3 = *s1++;
dst[4]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
t0 = *s0++; t2 = *s1++;
dst[5]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
t1 = *s0++; t3 = *s1++;
dst[6]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
t0 = *s0++; t2 = *s1++;
dst[7]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
dst+= stride;
src+= stride;
}while(--h);
}
static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
{
int y, vx, vy;
const int s= 1<<shift;
width--;
height--;
for(y=0; y<h; y++){
int x;
vx= ox;
vy= oy;
for(x=0; x<8; x++){ //XXX FIXME optimize
int src_x, src_y, frac_x, frac_y, index;
src_x= vx>>16;
src_y= vy>>16;
frac_x= src_x&(s-1);
frac_y= src_y&(s-1);
src_x>>=shift;
src_y>>=shift;
if((unsigned)src_x < width){
if((unsigned)src_y < height){
index= src_x + src_y*stride;
dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
+ src[index +1]* frac_x )*(s-frac_y)
+ ( src[index+stride ]*(s-frac_x)
+ src[index+stride+1]* frac_x )* frac_y
+ r)>>(shift*2);
}else{
index= src_x + clip(src_y, 0, height)*stride;
dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
+ src[index +1]* frac_x )*s
+ r)>>(shift*2);
}
}else{
if((unsigned)src_y < height){
index= clip(src_x, 0, width) + src_y*stride;
dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
+ src[index+stride ]* frac_y )*s
+ r)>>(shift*2);
}else{
index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
dst[y*stride + x]= src[index ];
}
}
vx+= dxx;
vy+= dyx;
}
ox += dxy;
oy += dyy;
}
}
#define H264_CHROMA_MC(OPNAME, OP)\
static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
const int A=(8-x)*(8-y);\
const int B=( x)*(8-y);\
const int C=(8-x)*( y);\
const int D=( x)*( y);\
\
assert(x<8 && y<8 && x>=0 && y>=0);\
\
do {\
int t0,t1,t2,t3; \
uint8_t *s0 = src; \
uint8_t *s1 = src+stride; \
t0 = *s0++; t2 = *s1++; \
t1 = *s0++; t3 = *s1++; \
OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
t0 = *s0++; t2 = *s1++; \
OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
dst+= stride;\
src+= stride;\
}while(--h);\
}\
\
static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
const int A=(8-x)*(8-y);\
const int B=( x)*(8-y);\
const int C=(8-x)*( y);\
const int D=( x)*( y);\
\
assert(x<8 && y<8 && x>=0 && y>=0);\
\
do {\
int t0,t1,t2,t3; \
uint8_t *s0 = src; \
uint8_t *s1 = src+stride; \
t0 = *s0++; t2 = *s1++; \
t1 = *s0++; t3 = *s1++; \
OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
t0 = *s0++; t2 = *s1++; \
OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
t1 = *s0++; t3 = *s1++; \
OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
t0 = *s0++; t2 = *s1++; \
OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
dst+= stride;\
src+= stride;\
}while(--h);\
}\
\
static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
const int A=(8-x)*(8-y);\
const int B=( x)*(8-y);\
const int C=(8-x)*( y);\
const int D=( x)*( y);\
\
assert(x<8 && y<8 && x>=0 && y>=0);\
\
do {\
int t0,t1,t2,t3; \
uint8_t *s0 = src; \
uint8_t *s1 = src+stride; \
t0 = *s0++; t2 = *s1++; \
t1 = *s0++; t3 = *s1++; \
OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
t0 = *s0++; t2 = *s1++; \
OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
t1 = *s0++; t3 = *s1++; \
OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
t0 = *s0++; t2 = *s1++; \
OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
t1 = *s0++; t3 = *s1++; \
OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\
t0 = *s0++; t2 = *s1++; \
OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\
t1 = *s0++; t3 = *s1++; \
OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\
t0 = *s0++; t2 = *s1++; \
OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\
dst+= stride;\
src+= stride;\
}while(--h);\
}
#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
#define op_put(a, b) a = (((b) + 32)>>6)
H264_CHROMA_MC(put_ , op_put)
H264_CHROMA_MC(avg_ , op_avg)
#undef op_avg
#undef op_put
/* not yet optimized */
static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
ST32(dst , LD32(src ));
dst+=dstStride;
src+=srcStride;
}
}
static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
ST32(dst , LD32(src ));
ST32(dst+4 , LD32(src+4 ));
dst+=dstStride;
src+=srcStride;
}
}
static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
ST32(dst , LD32(src ));
ST32(dst+4 , LD32(src+4 ));
ST32(dst+8 , LD32(src+8 ));
ST32(dst+12, LD32(src+12));
dst+=dstStride;
src+=srcStride;
}
}
static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
ST32(dst , LD32(src ));
ST32(dst+4 , LD32(src+4 ));
ST32(dst+8 , LD32(src+8 ));
ST32(dst+12, LD32(src+12));
dst[16]= src[16];
dst+=dstStride;
src+=srcStride;
}
}
static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
ST32(dst , LD32(src ));
ST32(dst+4 , LD32(src+4 ));
dst[8]= src[8];
dst+=dstStride;
src+=srcStride;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -