📄 dsputil.c
字号:
\static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ int src_stride1, int src_stride2, int h){\ int i;\ for(i=0; i<h; i++){\ uint32_t a,b;\ a= LD32(&src1[i*src_stride1 ]);\ b= LD32(&src2[i*src_stride2 ]);\ OP(*((uint32_t*)&dst[i*dst_stride ]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\ a= LD32(&src1[i*src_stride1+4]);\ b= LD32(&src2[i*src_stride2+4]);\ OP(*((uint32_t*)&dst[i*dst_stride+4]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\ }\}\\static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ int src_stride1, int src_stride2, int h){\ int i;\ for(i=0; i<h; i++){\ uint32_t a,b;\ a= LD32(&src1[i*src_stride1 ]);\ b= LD32(&src2[i*src_stride2 ]);\ OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ a= LD32(&src1[i*src_stride1+4]);\ b= LD32(&src2[i*src_stride2+4]);\ OP(*((uint32_t*)&dst[i*dst_stride+4]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ }\}\\static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ int src_stride1, int src_stride2, int h){\ OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\}\\static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ int src_stride1, int src_stride2, int h){\ OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\}\\static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\}\\static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\}\\static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\}\\static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\}\\static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ int i;\ for(i=0; i<h; i++){\ uint32_t a, b, c, d, l0, l1, h0, h1;\ a= LD32(&src1[i*src_stride1]);\ b= LD32(&src2[i*src_stride2]);\ c= LD32(&src3[i*src_stride3]);\ d= LD32(&src4[i*src_stride4]);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x02020202UL;\ h0= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ l1= (c&0x03030303UL)\ + (d&0x03030303UL);\ h1= ((c&0xFCFCFCFCUL)>>2)\ + ((d&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ a= LD32(&src1[i*src_stride1+4]);\ b= LD32(&src2[i*src_stride2+4]);\ c= LD32(&src3[i*src_stride3+4]);\ d= LD32(&src4[i*src_stride4+4]);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x02020202UL;\ h0= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ l1= (c&0x03030303UL)\ + (d&0x03030303UL);\ h1= ((c&0xFCFCFCFCUL)>>2)\ + ((d&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ }\}\static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ int i;\ for(i=0; i<h; i++){\ uint32_t a, b, c, d, l0, l1, h0, h1;\ a= LD32(&src1[i*src_stride1]);\ b= LD32(&src2[i*src_stride2]);\ c= LD32(&src3[i*src_stride3]);\ d= LD32(&src4[i*src_stride4]);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x01010101UL;\ h0= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ l1= (c&0x03030303UL)\ + (d&0x03030303UL);\ h1= ((c&0xFCFCFCFCUL)>>2)\ + ((d&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ a= LD32(&src1[i*src_stride1+4]);\ b= LD32(&src2[i*src_stride2+4]);\ c= LD32(&src3[i*src_stride3+4]);\ d= LD32(&src4[i*src_stride4+4]);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x01010101UL;\ h0= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ l1= (c&0x03030303UL)\ + (d&0x03030303UL);\ h1= ((c&0xFCFCFCFCUL)>>2)\ + ((d&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ }\}\static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\}\static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\}\\static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\{\ int j;\ for(j=0; j<2; j++){\ int i;\ const uint32_t a= LD32(pixels );\ const uint32_t b= LD32(pixels+1);\ uint32_t l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x02020202UL;\ uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ uint32_t l1,h1;\\ pixels+=line_size;\ for(i=0; i<h; i+=2){\ uint32_t a= LD32(pixels );\ uint32_t b= LD32(pixels+1);\ l1= (a&0x03030303UL)\ + (b&0x03030303UL);\ h1= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ pixels+=line_size;\ block +=line_size;\ a= LD32(pixels );\ b= LD32(pixels+1);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x02020202UL;\ h0= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ pixels+=line_size;\ block +=line_size;\ }\ pixels+=4-line_size*(h+1);\ block +=4-line_size*h;\ }\}\\static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\{\ int j;\ for(j=0; j<2; j++){\ int i;\ const uint32_t a= LD32(pixels );\ const uint32_t b= LD32(pixels+1);\ uint32_t l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x01010101UL;\ uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ uint32_t l1,h1;\\ pixels+=line_size;\ for(i=0; i<h; i+=2){\ uint32_t a= LD32(pixels );\ uint32_t b= LD32(pixels+1);\ l1= (a&0x03030303UL)\ + (b&0x03030303UL);\ h1= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ pixels+=line_size;\ block +=line_size;\ a= LD32(pixels );\ b= LD32(pixels+1);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x01010101UL;\ h0= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ pixels+=line_size;\ block +=line_size;\ }\ pixels+=4-line_size*(h+1);\ block +=4-line_size*h;\ }\}\\CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )#endif#define op_put(a, b) a = bPIXOP2(avg, op_avg)PIXOP2(put, op_put)#undef op_avg#undef op_put#define avg2(a,b) ((a+b+1)>>1)#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, int rounder){ const int A=(16-x16)*(16-y16); const int B=( x16)*(16-y16); const int C=(16-x16)*( y16); const int D=( x16)*( y16); int i; for(i=0; i<h; i++) { dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; dst+= stride; src+= stride; }}static void gmc_c(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height){ int y, vx, vy; const int s= 1<<shift; width--; height--; for(y=0; y<h; y++){ int x; vx= ox; vy= oy; for(x=0; x<8; x++){ //XXX FIXME optimize int src_x, src_y, frac_x, frac_y, index; src_x= vx>>16; src_y= vy>>16; frac_x= src_x&(s-1); frac_y= src_y&(s-1); src_x>>=shift; src_y>>=shift; if((unsigned)src_x < width){ if((unsigned)src_y < height){ index= src_x + src_y*stride; dst[y*stride + x]= ( ( src[index ]*(s-frac_x) + src[index +1]* frac_x )*(s-frac_y) + ( src[index+stride ]*(s-frac_x) + src[index+stride+1]* frac_x )* frac_y + r)>>(shift*2); }else{ index= src_x + clip(src_y, 0, height)*stride; dst[y*stride + x]= ( ( src[index ]*(s-frac_x) + src[index +1]* frac_x )*s + r)>>(shift*2); } }else{ if((unsigned)src_y < height){ index= clip(src_x, 0, width) + src_y*stride; dst[y*stride + x]= ( ( src[index ]*(s-frac_y) + src[index+stride ]* frac_y )*s + r)>>(shift*2); }else{ index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride; dst[y*stride + x]= src[index ]; } } vx+= dxx; vy+= dyx; } ox += dxy; oy += dyy; }}static inline void copy_block17(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){ int i; for(i=0; i<h; i++) { ST32(dst , LD32(src )); ST32(dst+4 , LD32(src+4 )); ST32(dst+8 , LD32(src+8 )); ST32(dst+12, LD32(src+12)); dst[16]= src[16]; dst+=dstStride; src+=srcStride; }}static inline void copy_block9(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){ int i; for(i=0; i<h; i++) { ST32(dst , LD32(src )); ST32(dst+4 , LD32(src+4 )); dst[8]= src[8]; dst+=dstStride; src+=srcStride; }}#define QPEL_MC(r, OPNAME, RND, OP) \static void OPNAME ## mpeg4_qpel8_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\ UINT8 *cm = cropTbl + MAX_NEG_CROP;\ int i;\ for(i=0; i<h; i++)\ {\ OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\ OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\ OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\ OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\ OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\ OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\ OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\ OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\ dst+=dstStride;\ src+=srcStride;\ }\}\\static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w){\ UINT8 *cm = cropTbl + MAX_NEG_CROP;\ int i;\ for(i=0; i<w; i++)\ {\ const int src0= src[0*srcStride];\ const int src1= src[1*srcStride];\ const int src2= src[2*srcStride];\ const int src3= src[3*srcStride];\ const int src4= src[4*srcStride];\ const int src5= src[5*srcStride];\ const int src6= src[6*srcStride];\ const int src7= src[7*srcStride];\ const int src8= src[8*srcStride];\ OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -