📄 dsputil.c
字号:
+ (b&0x03030303UL)\ + 0x02020202UL;\ uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ uint32_t l1,h1;\\ pixels+=line_size;\ for(i=0; i<h; i+=2){\ uint32_t a= AV_RN32(pixels );\ uint32_t b= AV_RN32(pixels+1);\ l1= (a&0x03030303UL)\ + (b&0x03030303UL);\ h1= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ pixels+=line_size;\ block +=line_size;\ a= AV_RN32(pixels );\ b= AV_RN32(pixels+1);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x02020202UL;\ h0= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ pixels+=line_size;\ block +=line_size;\ }\}\\static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\{\ int j;\ for(j=0; j<2; j++){\ int i;\ const uint32_t a= AV_RN32(pixels );\ const uint32_t b= AV_RN32(pixels+1);\ uint32_t l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x02020202UL;\ uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ uint32_t l1,h1;\\ pixels+=line_size;\ for(i=0; i<h; i+=2){\ uint32_t a= AV_RN32(pixels );\ uint32_t b= AV_RN32(pixels+1);\ l1= (a&0x03030303UL)\ + (b&0x03030303UL);\ h1= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ pixels+=line_size;\ block +=line_size;\ a= AV_RN32(pixels );\ b= AV_RN32(pixels+1);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x02020202UL;\ h0= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ pixels+=line_size;\ block +=line_size;\ }\ pixels+=4-line_size*(h+1);\ block +=4-line_size*h;\ }\}\\static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\{\ int j;\ for(j=0; j<2; j++){\ int i;\ const uint32_t a= AV_RN32(pixels );\ const uint32_t b= AV_RN32(pixels+1);\ uint32_t l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x01010101UL;\ uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ uint32_t l1,h1;\\ pixels+=line_size;\ for(i=0; i<h; i+=2){\ uint32_t a= AV_RN32(pixels );\ uint32_t b= AV_RN32(pixels+1);\ l1= (a&0x03030303UL)\ + (b&0x03030303UL);\ h1= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ pixels+=line_size;\ block +=line_size;\ a= AV_RN32(pixels );\ b= AV_RN32(pixels+1);\ l0= (a&0x03030303UL)\ + (b&0x03030303UL)\ + 0x01010101UL;\ h0= ((a&0xFCFCFCFCUL)>>2)\ + ((b&0xFCFCFCFCUL)>>2);\ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ pixels+=line_size;\ block +=line_size;\ }\ pixels+=4-line_size*(h+1);\ block +=4-line_size*h;\ }\}\\CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\#define op_avg(a, b) a = rnd_avg32(a, b)#endif#define op_put(a, b) a = bPIXOP2(avg, op_avg)PIXOP2(put, op_put)#undef op_avg#undef op_put#define avg2(a,b) ((a+b+1)>>1)#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)#ifdef JZ4740_MXU_OPT#define put_pixels2_mxu put_pixels2_c#define avg_pixels2_mxu avg_pixels2_c#define put_pixels2_xy2_mxu put_pixels2_xy2_c#define avg_pixels2_xy2_mxu avg_pixels2_xy2_c#define put_pixels4_xy2_mxu put_pixels4_xy2_c#define avg_pixels4_xy2_mxu avg_pixels4_xy2_cstatic void put_pixels4_mxu(uint8_t *dst, const uint8_t *src, int stride, int h){ uint32_t src_aln,src_rs; int i; src_aln = (uint32_t)src & 0xfffffffc; src_rs = 4 - ((uint32_t)src & 3); dst -= stride; for(i=0; i<h; i++) { S32LDD(xr1,src_aln,0); S32LDD(xr2,src_aln,4); src_aln += stride; S32ALN(xr1,xr2,xr1,src_rs); //xr1 <- src[3:0] S32SDIV(xr1,dst,stride,0); }}static void avg_pixels4_mxu(uint8_t *dst, const uint8_t *src, int stride, int h){ uint32_t src_aln,src_rs; int i; src_aln = (uint32_t)src & 0xfffffffc; src_rs = 4 - ((uint32_t)src & 3); dst -= stride; src_aln -= stride; for(i=0; i<4; i++) { S32LDIV(xr1,src_aln,stride,0); S32LDD(xr2,src_aln,4); S32LDIV(xr3, dst, stride, 0); S32ALN(xr1,xr2,xr1,src_rs); //xr1 <- src[3:0] Q8AVGR(xr1,xr3,xr1); S32STD(xr1,dst,0); }}static void put_pixels8_mxu(uint8_t *dst, const uint8_t *src, int stride, int h){ uint32_t src_aln,src_rs; int i; src_aln = (uint32_t)src & 0xfffffffc; src_rs = 4 - ((uint32_t)src & 3); dst -= stride; src_aln -= stride; for(i=0; i<h; i++) { S32LDIV(xr1,src_aln,stride,0); S32LDD(xr2,src_aln,4); S32LDD(xr3,src_aln,8); S32ALN(xr1,xr2,xr1,src_rs); //xr1 <- src[3:0] S32ALN(xr2,xr3,xr2,src_rs); //xr2 <- src[7:4] S32SDIV(xr1,dst,stride,0); S32STD(xr2,dst,4); }}#define put_no_rnd_pixels8_mxu put_pixels8_mxustatic void avg_pixels8_mxu(uint8_t *dst, const uint8_t *src, int stride, int h){ uint32_t src_aln,src_rs; int i; src_aln = (uint32_t)src & 0xfffffffc; src_rs = 4 - ((uint32_t)src & 3); dst -= stride; src_aln -= stride; for(i=0; i<8; i++) { S32LDIV(xr1,src_aln,stride,0); S32LDD(xr2,src_aln,4); S32LDD(xr3,src_aln,8); S32LDIV(xr4, dst, stride, 0); S32LDD(xr5,dst,4); S32ALN(xr1,xr2,xr1,src_rs); //xr1 <- src[3:0] S32ALN(xr2,xr3,xr2,src_rs); //xr2 <- src[7:4] Q8AVGR(xr1,xr4,xr1); Q8AVGR(xr2,xr5,xr2); S32STD(xr1,dst,0); S32STD(xr2,dst,4); }}#define avg_no_rnd_pixels8_mxu avg_pixels8_mxustatic void put_pixels16_mxu(uint8_t *dst, const uint8_t *src, int stride, int h){ uint32_t src_aln,src_rs; int i; src_aln = (uint32_t)src & 0xfffffffc; src_rs = 4 - ((uint32_t)src & 3); dst -= stride; src_aln -= stride; for(i=0; i<h; i++) { S32LDIV(xr1,src_aln,stride,0); S32LDD(xr2,src_aln,4); S32LDD(xr3,src_aln,8); S32LDD(xr4,src_aln,12); S32LDD(xr5,src_aln,16); S32ALN(xr1,xr2,xr1,src_rs); //xr1 <- src[3:0] S32ALN(xr2,xr3,xr2,src_rs); //xr2 <- src[7:4] S32ALN(xr3,xr4,xr3,src_rs); //xr3 <- src[11:8] S32ALN(xr4,xr5,xr4,src_rs); //xr4 <- src[16:12] S32SDIV(xr1,dst,stride,0); S32STD(xr2,dst,4); S32STD(xr3,dst,8); S32STD(xr4,dst,12); }}#define put_no_rnd_pixels16_mxu put_pixels16_mxustatic void avg_pixels16_mxu(uint8_t *dst, const uint8_t *src, int stride, int h){ uint32_t src_aln,src_rs; int i; src_aln = (uint32_t)src & 0xfffffffc; src_rs = 4 - ((uint32_t)src & 3); dst -= stride; src_aln -= stride; for(i=0; i<h; i++) { S32LDIV(xr1,src_aln,stride,0); S32LDD(xr2,src_aln,4); S32LDD(xr3,src_aln,8); S32LDD(xr4,src_aln,12); S32LDD(xr5,src_aln,16); S32LDIV(xr6, dst, stride, 0); S32LDD(xr7,dst,4); S32LDD(xr8,dst,8); S32LDD(xr9,dst,12); S32ALN(xr1,xr2,xr1,src_rs); //xr1 <- src[3:0] S32ALN(xr2,xr3,xr2,src_rs); //xr2 <- src[7:4] S32ALN(xr3,xr4,xr3,src_rs); //xr3 <- src[11:8] S32ALN(xr4,xr5,xr4,src_rs); //xr2 <- src[16:12] Q8AVGR(xr1,xr6,xr1); Q8AVGR(xr2,xr7,xr2); Q8AVGR(xr3,xr8,xr3); Q8AVGR(xr4,xr9,xr4); S32STD(xr1,dst,0); S32STD(xr2,dst,4); S32STD(xr3,dst,8); S32STD(xr4,dst,12); }}#define avg_no_rnd_pixels16_mxu avg_pixels16_mxustatic void put_pixels4_l2_mxu(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h){ int i; uint32_t src_aln1, src_rs1, src_aln2, src_rs2; src_aln1 = (uint32_t)src1 & 0xfffffffc; src_rs1 = 4 - ((uint32_t)src1 & 3); src_aln2 = (uint32_t)src2 & 0xfffffffc; src_rs2 = 4 - ((uint32_t)src2 & 3); dst -= dst_stride; src_aln1 -= src_stride1; src_aln2 -= src_stride2; for(i=0; i<h; i++){ S32LDIV(xr1, src_aln1, src_stride1, 0); S32LDD(xr2, src_aln1, 4); S32LDIV(xr4, src_aln2, src_stride2, 0); S32LDD(xr5, src_aln2, 4); S32ALN(xr1, xr2, xr1, src_rs1); S32ALN(xr4, xr5, xr4, src_rs2); Q8AVGR(xr1, xr4, xr1); S32SDIV(xr1, dst, dst_stride, 0); }}static void avg_pixels4_l2_mxu(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h){ int i; uint32_t src_aln1, src_rs1, src_aln2, src_rs2; src_aln1 = (uint32_t)src1 & 0xfffffffc; src_rs1 = 4 - ((uint32_t)src1 & 3); src_aln2 = (uint32_t)src2 & 0xfffffffc; src_rs2 = 4 - ((uint32_t)src2 & 3); dst -= dst_stride; src_aln1 -= src_stride1; src_aln2 -= src_stride2; for(i=0; i<h; i++){ S32LDIV(xr1, src_aln1, src_stride1, 0); S32LDD(xr2, src_aln1, 4); S32LDIV(xr4, src_aln2, src_stride2, 0); S32LDD(xr5, src_aln2, 4); S32LDIV(xr3, dst, dst_stride, 0); S32ALN(xr1, xr2, xr1, src_rs1); S32ALN(xr4, xr5, xr4, src_rs2); Q8AVGR(xr1, xr4, xr1); Q8AVGR(xr1, xr3, xr1); S32STD(xr1, dst, 0); }}static void put_pixels8_l2_mxu(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h){ int i; uint32_t src_aln1, src_rs1, src_aln2, src_rs2; src_aln1 = (uint32_t)src1 & 0xfffffffc; src_rs1 = 4 - ((uint32_t)src1 & 3); src_aln2 = (uint32_t)src2 & 0xfffffffc; src_rs2 = 4 - ((uint32_t)src2 & 3); dst -= dst_stride; src_aln1 -= src_stride1; src_aln2 -= src_stride2; for(i=0; i<h; i++) { S32LDIV(xr1, src_aln1, src_stride1, 0); S32LDD(xr2, src_aln1, 4); S32LDD(xr3, src_aln1, 8); S32LDIV(xr4, src_aln2, src_stride2, 0); S32LDD(xr5, src_aln2, 4); S32LDD(xr6, src_aln2, 8);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -