📄 dsputil.c
字号:
S32ALN(xr1, xr2, xr1, src_rs1); S32ALN(xr4, xr5, xr4, src_rs2); Q8AVGR(xr1, xr4, xr1); S32ALN(xr2, xr3, xr2, src_rs1); S32ALN(xr5, xr6, xr5, src_rs2); Q8AVGR(xr2, xr5, xr2); S32SDIV(xr1, dst, dst_stride, 0); S32STD(xr2, dst, 4); }}static void put_no_rnd_pixels8_l2_mxu(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h){ int i; uint32_t src_aln1, src_rs1, src_aln2, src_rs2; src_aln1 = (uint32_t)src1 & 0xfffffffc; src_rs1 = 4 - ((uint32_t)src1 & 3); src_aln2 = (uint32_t)src2 & 0xfffffffc; src_rs2 = 4 - ((uint32_t)src2 & 3); dst -= dst_stride; src_aln1 -= src_stride1; src_aln2 -= src_stride2; for(i=0; i<h; i++) { S32LDIV(xr1, src_aln1, src_stride1, 0); S32LDD(xr2, src_aln1, 4); S32LDD(xr3, src_aln1, 8); S32LDIV(xr4, src_aln2, src_stride2, 0); S32LDD(xr5, src_aln2, 4); S32LDD(xr6, src_aln2, 8); S32ALN(xr1, xr2, xr1, src_rs1); S32ALN(xr4, xr5, xr4, src_rs2); Q8AVG(xr1, xr4, xr1); S32ALN(xr2, xr3, xr2, src_rs1); S32ALN(xr5, xr6, xr5, src_rs2); Q8AVG(xr2, xr5, xr2); S32SDIV(xr1, dst, dst_stride, 0); S32STD(xr2, dst, 4); }}static void avg_pixels8_l2_mxu(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h){ int i; uint32_t src_aln1, src_rs1, src_aln2, src_rs2; src_aln1 = (uint32_t)src1 & 0xfffffffc; src_rs1 = 4 - ((uint32_t)src1 & 3); src_aln2 = (uint32_t)src2 & 0xfffffffc; src_rs2 = 4 - ((uint32_t)src2 & 3); dst -= dst_stride; src_aln1 -= src_stride1; src_aln2 -= src_stride2; for(i=0; i<h; i++) { S32LDIV(xr1, src_aln1, src_stride1, 0); S32LDD(xr2, src_aln1, 4); S32LDD(xr3, src_aln1, 8); S32LDIV(xr4, src_aln2, src_stride2, 0); S32LDD(xr5, src_aln2, 4); S32LDD(xr6, src_aln2, 8); S32LDIV(xr7, dst, dst_stride, 0); S32LDD(xr8, dst, 4); S32ALN(xr1, xr2, xr1, src_rs1); S32ALN(xr4, xr5, xr4, src_rs2); Q8AVGR(xr1, xr4, xr1); Q8AVGR(xr1, xr7, xr1); S32ALN(xr2, xr3, xr2, src_rs1); S32ALN(xr5, xr6, xr5, src_rs2); Q8AVGR(xr2, xr5, xr2); Q8AVGR(xr2, xr8, xr2); S32STD(xr1, dst, 0); S32STD(xr2, dst, 4); }}static void avg_no_rnd_pixels8_l2_mxu(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h){ int i; uint32_t src_aln1, src_rs1, src_aln2, src_rs2; src_aln1 = (uint32_t)src1 & 0xfffffffc; src_rs1 = 4 - ((uint32_t)src1 & 3); src_aln2 = (uint32_t)src2 & 0xfffffffc; src_rs2 = 4 - ((uint32_t)src2 & 3); dst -= dst_stride; src_aln1 -= src_stride1; src_aln2 -= src_stride2; for(i=0; i<h; i++) { S32LDIV(xr1, src_aln1, src_stride1, 0); S32LDD(xr2, src_aln1, 4); S32LDD(xr3, src_aln1, 8); S32LDIV(xr4, src_aln2, src_stride2, 0); S32LDD(xr5, src_aln2, 4); S32LDD(xr6, src_aln2, 8); S32LDIV(xr7, dst, dst_stride, 0); S32LDD(xr8, dst, 4); S32ALN(xr1, xr2, xr1, src_rs1); S32ALN(xr4, xr5, xr4, src_rs2); Q8AVG(xr1, xr4, xr1); Q8AVGR(xr1, xr7, xr1); S32ALN(xr2, xr3, xr2, src_rs1); S32ALN(xr5, xr6, xr5, src_rs2); Q8AVG(xr2, xr5, xr2); Q8AVGR(xr2, xr8, xr2); S32STD(xr1, dst, 0); S32STD(xr2, dst, 4); }}static void put_pixels8_xy2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ int i; uint32_t a= AV_RN32(pixels+0); uint32_t b= AV_RN32(pixels+1); uint32_t c= AV_RN32(pixels+4); uint32_t d= AV_RN32(pixels+5); uint32_t l0_0= (a&0x03030303UL) + (b&0x03030303UL) + 0x02020202UL; uint32_t h0_0= ((a&0xFCFCFCFCUL)>>2) + ((b&0xFCFCFCFCUL)>>2); uint32_t l1_0= (c&0x03030303UL) + (d&0x03030303UL) + 0x02020202UL; uint32_t h1_0= ((c&0xFCFCFCFCUL)>>2) + ((d&0xFCFCFCFCUL)>>2); uint32_t l0_1 ,h0_1, l1_1, h1_1; pixels+=line_size; for(i=0; i<h; i+=2){ uint32_t a= AV_RN32(pixels+0); uint32_t b= AV_RN32(pixels+1); uint32_t c= AV_RN32(pixels+4); uint32_t d= AV_RN32(pixels+5); l0_1= (a&0x03030303UL) + (b&0x03030303UL); h0_1= ((a&0xFCFCFCFCUL)>>2) + ((b&0xFCFCFCFCUL)>>2); l1_1= (c&0x03030303UL) + (d&0x03030303UL); h1_1= ((c&0xFCFCFCFCUL)>>2) + ((d&0xFCFCFCFCUL)>>2); // OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL)); *((uint32_t*)block) = h0_0+h0_1+(((l0_0+l0_1)>>2)&0x0F0F0F0FUL); *((uint32_t*)(block+4)) = h1_0+h1_1+(((l1_0+l1_1)>>2)&0x0F0F0F0FUL); pixels+=line_size; block +=line_size; a= AV_RN32(pixels+0); b= AV_RN32(pixels+1); c= AV_RN32(pixels+4); d= AV_RN32(pixels+5); l0_0= (a&0x03030303UL) + (b&0x03030303UL) + 0x02020202UL; h0_0= ((a&0xFCFCFCFCUL)>>2) + ((b&0xFCFCFCFCUL)>>2); l1_0= (c&0x03030303UL) + (d&0x03030303UL) + 0x02020202UL; h1_0= ((c&0xFCFCFCFCUL)>>2) + ((d&0xFCFCFCFCUL)>>2); // OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL)); *((uint32_t*)block) = h0_0+h0_1+(((l0_0+l0_1)>>2)&0x0F0F0F0FUL); *((uint32_t*)(block+4)) = h1_0+h1_1+(((l1_0+l1_1)>>2)&0x0F0F0F0FUL); pixels+=line_size; block +=line_size; }}static void avg_pixels8_xy2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ int i; uint32_t a= AV_RN32(pixels+0); uint32_t b= AV_RN32(pixels+1); uint32_t c= AV_RN32(pixels+4); uint32_t d= AV_RN32(pixels+5); uint32_t l0_0= (a&0x03030303UL) + (b&0x03030303UL) + 0x02020202UL; uint32_t h0_0= ((a&0xFCFCFCFCUL)>>2) + ((b&0xFCFCFCFCUL)>>2); uint32_t l1_0= (c&0x03030303UL) + (d&0x03030303UL) + 0x02020202UL; uint32_t h1_0= ((c&0xFCFCFCFCUL)>>2) + ((d&0xFCFCFCFCUL)>>2); uint32_t l0_1 ,h0_1, l1_1, h1_1; pixels+=line_size; for(i=0; i<h; i+=2){ uint32_t a= AV_RN32(pixels+0); uint32_t b= AV_RN32(pixels+1); uint32_t c= AV_RN32(pixels+4); uint32_t d= AV_RN32(pixels+5); l0_1= (a&0x03030303UL) + (b&0x03030303UL); h0_1= ((a&0xFCFCFCFCUL)>>2) + ((b&0xFCFCFCFCUL)>>2); l1_1= (c&0x03030303UL) + (d&0x03030303UL); h1_1= ((c&0xFCFCFCFCUL)>>2) + ((d&0xFCFCFCFCUL)>>2); // OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL)); *((uint32_t*)block) = rnd_avg32(*((uint32_t*)block), h0_0+h0_1+(((l0_0+l0_1)>>2)&0x0F0F0F0FUL)); *((uint32_t*)(block+4)) = rnd_avg32(*((uint32_t*)(block+4)), h1_0+h1_1+(((l1_0+l1_1)>>2)&0x0F0F0F0FUL)); pixels+=line_size; block +=line_size; a= AV_RN32(pixels+0); b= AV_RN32(pixels+1); c= AV_RN32(pixels+4); d= AV_RN32(pixels+5); l0_0= (a&0x03030303UL) + (b&0x03030303UL) + 0x02020202UL; h0_0= ((a&0xFCFCFCFCUL)>>2) + ((b&0xFCFCFCFCUL)>>2); l1_0= (c&0x03030303UL) + (d&0x03030303UL) + 0x02020202UL; h1_0= ((c&0xFCFCFCFCUL)>>2) + ((d&0xFCFCFCFCUL)>>2); // OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL)); *((uint32_t*)block) = rnd_avg32(*((uint32_t*)block), h0_0+h0_1+(((l0_0+l0_1)>>2)&0x0F0F0F0FUL)); *((uint32_t*)(block+4)) = rnd_avg32(*((uint32_t*)(block+4)), h1_0+h1_1+(((l1_0+l1_1)>>2)&0x0F0F0F0FUL)); pixels+=line_size; block +=line_size; }}static void put_no_rnd_pixels8_xy2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ int i; uint32_t a= AV_RN32(pixels+0); uint32_t b= AV_RN32(pixels+1); uint32_t c= AV_RN32(pixels+4); uint32_t d= AV_RN32(pixels+5); uint32_t l0_0= (a&0x03030303UL) + (b&0x03030303UL) + 0x01010101UL; uint32_t h0_0= ((a&0xFCFCFCFCUL)>>2) + ((b&0xFCFCFCFCUL)>>2); uint32_t l1_0= (c&0x03030303UL) + (d&0x03030303UL) + 0x01010101UL; uint32_t h1_0= ((c&0xFCFCFCFCUL)>>2) + ((d&0xFCFCFCFCUL)>>2); uint32_t l0_1 ,h0_1, l1_1, h1_1; pixels+=line_size; for(i=0; i<h; i+=2){ uint32_t a= AV_RN32(pixels+0); uint32_t b= AV_RN32(pixels+1); uint32_t c= AV_RN32(pixels+4); uint32_t d= AV_RN32(pixels+5); l0_1= (a&0x03030303UL) + (b&0x03030303UL); h0_1= ((a&0xFCFCFCFCUL)>>2) + ((b&0xFCFCFCFCUL)>>2); l1_1= (c&0x03030303UL) + (d&0x03030303UL); h1_1= ((c&0xFCFCFCFCUL)>>2) + ((d&0xFCFCFCFCUL)>>2); // OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL)); *((uint32_t*)block) = h0_0+h0_1+(((l0_0+l0_1)>>2)&0x0F0F0F0FUL); *((uint32_t*)(block+4)) = h1_0+h1_1+(((l1_0+l1_1)>>2)&0x0F0F0F0FUL); pixels+=line_size; block +=line_size; a= AV_RN32(pixels+0); b= AV_RN32(pixels+1); c= AV_RN32(pixels+4); d= AV_RN32(pixels+5); l0_0= (a&0x03030303UL) + (b&0x03030303UL) + 0x01010101UL; h0_0= ((a&0xFCFCFCFCUL)>>2) + ((b&0xFCFCFCFCUL)>>2); l1_0= (c&0x03030303UL) + (d&0x03030303UL) + 0x01010101UL; h1_0= ((c&0xFCFCFCFCUL)>>2) + ((d&0xFCFCFCFCUL)>>2); // OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL)); *((uint32_t*)block) = h0_0+h0_1+(((l0_0+l0_1)>>2)&0x0F0F0F0FUL); *((uint32_t*)(block+4)) = h1_0+h1_1+(((l1_0+l1_1)>>2)&0x0F0F0F0FUL); pixels+=line_size; block +=line_size; }}static void avg_no_rnd_pixels8_xy2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ int i; uint32_t a= AV_RN32(pixels+0); uint32_t b= AV_RN32(pixels+1); uint32_t c= AV_RN32(pixels+4); uint32_t d= AV_RN32(pixels+5); uint32_t l0_0= (a&0x03030303UL) + (b&0x03030303UL) + 0x01010101UL; uint32_t h0_0= ((a&0xFCFCFCFCUL)>>2) + ((b&0xFCFCFCFCUL)>>2); uint32_t l1_0= (c&0x03030303UL) + (d&0x03030303UL) + 0x01010101UL; uint32_t h1_0= ((c&0xFCFCFCFCUL)>>2) + ((d&0xFCFCFCFCUL)>>2); uint32_t l0_1 ,h0_1, l1_1, h1_1; pixels+=line_size; for(i=0; i<h; i+=2){ uint32_t a= AV_RN32(pixels+0); uint32_t b= AV_RN32(pixels+1); uint32_t c= AV_RN32(pixels+4); uint32_t d= AV_RN32(pixels+5); l0_1= (a&0x03030303UL) + (b&0x03030303UL); h0_1= ((a&0xFCFCFCFCUL)>>2) + ((b&0xFCFCFCFCUL)>>2); l1_1= (c&0x03030303UL) + (d&0x03030303UL); h1_1= ((c&0xFCFCFCFCUL)>>2) + ((d&0xFCFCFCFCUL)>>2); // OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL)); *((uint32_t*)block) = rnd_avg32(*((uint32_t*)block), h0_0+h0_1+(((l0_0+l0_1)>>2)&0x0F0F0F0FUL)); *((uint32_t*)(block+4)) = rnd_avg32(*((uint32_t*)(block+4)), h1_0+h1_1+(((l1_0+l1_1)>>2)&0x0F0F0F0FUL)); pixels+=line_size; block +=line_size; a= AV_RN32(pixels+0); b= AV_RN32(pixels+1); c= AV_RN32(pixels+4); d= AV_RN32(pixels+5); l0_0= (a&0x03030303UL) + (b&0x03030303UL) + 0x01010101UL; h0_0= ((a&0xFCFCFCFCUL)>>2) + ((b&0xFCFCFCFCUL)>>2); l1_0= (c&0x03030303UL) + (d&0x03030303UL) + 0x01010101UL; h1_0= ((c&0xFCFCFCFCUL)>>2) + ((d&0xFCFCFCFCUL)>>2); // OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL)); *((uint32_t*)block) = rnd_avg32(*((uint32_t*)block), h0_0+h0_1+(((l0_0+l0_1)>>2)&0x0F0F0F0FUL)); *((uint32_t*)(block+4)) = rnd_avg32(*((uint32_t*)(block+4)), h1_0+h1_1+(((l1_0+l1_1)>>2)&0x0F0F0F0FUL)); pixels+=line_size; block +=line_size; }}static void put_pixels16_l2_mxu(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h){ put_pixels8_l2_mxu(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h); put_pixels8_l2_mxu(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);}static void avg_pixels16_l2_mxu(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h){ avg_pixels8_l2_mxu(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h); avg_pixels8_l2_mxu(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);}static void put_no_rnd_pixels16_l2_mxu(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h){ put_no_rnd_pixels8_l2_mxu(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h); put_no_rnd_pixels8_l2_mxu(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);}static void avg_no_rnd_pixels16_l2_mxu(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h){ avg_no_rnd_pixels8_l2_mxu(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h); avg_no_rnd_pixels8_l2_mxu(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);}static void put_pixels4_x2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ put_pixels4_l2_mxu(block, pixels, pixels+1, line_size, line_size, line_size, h);}static void put_pixels4_y2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ put_pixels4_l2_mxu(block, pixels, pixels+line_size, line_size, line_size, line_size, h);}static void put_pixels2_x2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ put_pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);}static void put_pixels2_y2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ put_pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);}static void avg_pixels4_x2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ avg_pixels4_l2_mxu(block, pixels, pixels+1, line_size, line_size, line_size, h);}static void avg_pixels4_y2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ avg_pixels4_l2_mxu(block, pixels, pixels+line_size, line_size, line_size, line_size, h);}static void avg_pixels2_x2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ avg_pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);}static void avg_pixels2_y2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ avg_pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);}static void put_no_rnd_pixels8_x2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ put_no_rnd_pixels8_l2_mxu(block, pixels, pixels+1, line_size, line_size, line_size, h);}static void avg_no_rnd_pixels8_x2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ avg_no_rnd_pixels8_l2_mxu(block, pixels, pixels+1, line_size, line_size, line_size, h);}static void put_pixels8_x2_mxu(uint8_t *block, const uint8_t *pixels, int line_size, int h){ put_pixels8_l2_mxu(block, pixels, pixels+1, line_size, line_size, line_size, h);}static void av
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -