📄 h264dsputil.c
字号:
Q16ADD_SS_WW (xr1, xr0, xr2, xr0); // xr1 = -xr2 -delta Q16ADD_SS_WW (xr11, xr0, xr12, xr0); // xr11 = -xr12 -delta Q8ACCE_AA (xr2, xr0, xr10, xr12); // (p0 + i_delta) Q8ACCE_AA (xr1, xr0, xr9, xr11); // (q0 - i_delta)// Q16SAT (xr2, xr2, xr12); Q16SAT (xr1, xr1, xr11); p0 = S32M2I (xr2); q0 = S32M2I (xr1);// pix[-1] = p0; pix[0] = q0; pix += ystride; pix[-1] = (p0 >> 8); pix[0] = (q0 >> 8); pix += ystride; pix[-1] = (p0 >> 16); pix[0] = (q0 >> 16); pix += ystride; pix[-1] = (p0 >> 24); pix[0] = (q0 >> 24); pix += ystride; }}#elsestatic inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0){ int i, d; for( i = 0; i < 4; i++ ) { const int tc = tc0[i]; if( tc <= 0 ) { pix += 2*ystride; continue; } for( d = 0; d < 2; d++ ) { const int p0 = pix[-1*xstride]; const int p1 = pix[-2*xstride]; const int q0 = pix[0]; const int q1 = pix[1*xstride]; if( FFABS( p0 - q0 ) < alpha && FFABS( p1 - p0 ) < beta && FFABS( q1 - q0 ) < beta ) { int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */ pix[0] = av_clip_uint8( q0 - delta ); /* q0' */ } pix += ystride; } }}static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0){ h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);}static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0){ h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);}#endif#ifdef JZ4740_MXU_OPTstatic void h264_v_loop_filter_chroma_intra_mxu(uint8_t *pix, int xstride, int alpha, int beta){ int d; uint8_t *tpix; S32I2M (xr14, beta); S32SFL (xr0, xr14, xr14, xr14, ptn0); S32SFL (xr0, xr14, xr14, xr14, ptn3); // xr14: beta S32I2M (xr13, alpha); S32SFL (xr0, xr13, xr13, xr13, ptn0); S32SFL (xr0, xr13, xr13, xr13, ptn3); // xr13: alpha S32I2M (xr8, 0x01010101); for (d = 0; d < 2; d++) { tpix = pix - 2*xstride; S32LDD (xr11, tpix, 0); // p1 S32LDIV(xr10, tpix, xstride, 0); // p0 S32LDIV(xr9, tpix, xstride, 0); // q0 S32LDIV(xr1, tpix, xstride, 0); // q1 Q8ABD(xr3, xr10, xr9 ); // FFABS (p0 - q0) Q8ABD(xr4, xr11, xr10); // FFABS (p1 - p0) Q8ABD(xr5, xr1, xr9 ); // FFABS (q1 - q0)// FFABS(p0 - q0) - alpha, FFABS(p1 - p0) - beta Q8ADDE_SS (xr6, xr3, xr13, xr7); // FFABS(p0 - q0) - alpha Q8ADDE_SS (xr3, xr4, xr14, xr4); // FFABS(p1 - p0) - beta Q16SLR (xr6, xr6, xr7, xr7, 15); // 1: < 0 (FFABS( p0 - q0 ) < alpha) Q16SLR (xr3, xr3, xr4, xr4, 15); // 1: < 0 (FFABS( p1 - p0 ) < beta) Q16SAT (xr4, xr3, xr4); // xr4: 1: < 0 (FFABS( p1 - p0 ) < beta) Q16SAT (xr3, xr6, xr7); // xr3: 1: < 0 (FFABS( p0 - q0 ) < alpha)// FFABS(q1 - q0) - beta Q8ADDE_SS (xr6, xr5, xr14, xr7); // FFABS(q1 - q0) - beta Q16SLR (xr6, xr6, xr7, xr7, 15); // 1: < 0 (FFABS( q1 - q0 ) < beta) Q16SAT (xr5, xr6, xr7); // xr5: 1: < 0 (FFABS( q1 - q0 ) < beta)// if( FFABS( p0 - q0 ) < alpha && ..) Q8MADL_AA (xr0, xr3, xr4, xr3); // if( FFABS( p0 - q0 ) < alpha && ..)// Q8ADDE_AA (xr12, xr11, xr8, xr7); // (xr12, xr7): p1 + 1 Q16SLL (xr12, xr12, xr7, xr7, 1); // (xr12, xr7): 2*p1 + 2 Q8ACCE_AA (xr12, xr10, xr1, xr7); // (xr12, xr7): 2*p1 + p0 + q1 + 2// Q8MADL_AA (xr0, xr3, xr5, xr3); // if( FFABS( p0 - q0 ) < alpha && ... && ...)// Q8ADDE_AA (xr2, xr1, xr8, xr6); // (xr2, xr6): q1 + 1 Q16SLL (xr2, xr2, xr6, xr6, 1); // (xr2, xr6): 2*q1 + 2 Q8ACCE_AA (xr2, xr9, xr11, xr6); // (xr2, xr6): 2*q1 + q0 + p1 + 2// Q16SAR (xr12, xr12, xr7, xr7, 2); // (xr12, xr7): (2*p1 + p0 + q1 + 2) >> 2, new p0 S32SFL (xr0, xr12, xr7, xr12, ptn1); // xr12: new p0 Q8MADL_AA (xr0, xr3, xr12, xr12); // xr12: new p0 mask Q16SAR (xr2, xr2, xr6, xr6, 2); // (xr2, xr6): (2*q1 + q0 + p1 + 2) >> 2, new q0 S32SFL (xr0, xr2, xr6, xr2, ptn1); // xr12: new q0 Q8MADL_AA (xr0, xr3, xr2, xr2); // xr2: new q0 mask// Q8ADD_SS (xr4, xr8, xr3); // xr4: ~(xr3) Q8MADL_AA (xr0, xr4, xr10, xr10); // xr10: old p0 mask Q8MADL_AA (xr0, xr4, xr9, xr9); // xr9: old q0 mask// store tpix = pix - xstride; Q8ADD_AA (xr10, xr10, xr12); // xr10: final p0' S32STD (xr10, tpix, 0); // pix[-xstride] = ...p0' Q8ADD_AA (xr9, xr9, xr2); // xr9: final q0' S32STDV (xr9, tpix, xstride, 0); // pix[0] = ...q0' pix += 4; }}static void h264_h_loop_filter_chroma_intra_mxu(uint8_t *pix, int ystride, int alpha, int beta){ int d; uint8_t *tpix; unsigned int p0, q0; S32I2M (xr14, beta); S32SFL (xr0, xr14, xr14, xr14, ptn0); S32SFL (xr0, xr14, xr14, xr14, ptn3); // xr14: beta S32I2M (xr13, alpha); S32SFL (xr0, xr13, xr13, xr13, ptn0); S32SFL (xr0, xr13, xr13, xr13, ptn3); // xr13: alpha S32I2M (xr8, 0x01010101); for( d = 0; d < 2; d++ ) { tpix = pix; S32LDD (xr2, tpix, 0); // xr2: q3,q2,q1,q0 S32LDD (xr1, tpix, -4); // xr1: p0,p1,p2,p3 S32LDIV (xr4, tpix, ystride, 0); // xr4: q3',q2',q1',q0' S32LDD (xr3, tpix, -4); // xr3: p0',p1',p2',p3'// S32SFL (xr6, xr4, xr2, xr12, ptn0); // xr6: q3'q3q2'q2, xr12:q1'q1q0'q0 S32SFL (xr5, xr3, xr1, xr7, ptn0); // xr5: p0'p0p1'p1, xr7:p2'p2p3'p3// S32LDIV (xr2, tpix, ystride, 0); // xr2: q3",q2",q1",q0" S32LDD (xr1, tpix, -4); // xr1: p0",p1",p2",p3" S32LDIV (xr4, tpix, ystride, 0); // xr4: q3^,q2^,q1^,q0^ S32LDD (xr3, tpix, -4); // xr3: p0^,p1^,p2^,p3^// S32SFL (xr4, xr4, xr2, xr2, ptn0); // xr4: q3^q3"q2^q2", xr2:q1^q1"q0^q0" S32SFL (xr3, xr3, xr1, xr1, ptn0); // xr3: p0^p0"p1^p1", xr1:p2^p2"p3^p3"// S32SFL (xr10, xr3, xr5, xr11, ptn3); // xr10: p0^p0"p0'p0, xr11:p1^p1"p1'p1 S32SFL (xr1, xr2, xr12, xr9, ptn3); // xr1: q1^q1"q1'q1, xr9:q0^q0"q0'q0// Q8ABD(xr3, xr10, xr9 ); // FFABS (p0 - q0) Q8ABD(xr4, xr11, xr10); // FFABS (p1 - p0) Q8ABD(xr5, xr1, xr9 ); // FFABS (q1 - q0)// FFABS(p0 - q0) - alpha, FFABS(p1 - p0) - beta Q8ADDE_SS (xr6, xr3, xr13, xr7); // FFABS(p0 - q0) - alpha Q8ADDE_SS (xr3, xr4, xr14, xr4); // FFABS(p1 - p0) - beta Q16SLR (xr6, xr6, xr7, xr7, 15); // 1: < 0 (FFABS( p0 - q0 ) < alpha) Q16SLR (xr3, xr3, xr4, xr4, 15); // 1: < 0 (FFABS( p1 - p0 ) < beta) Q16SAT (xr4, xr3, xr4); // xr4: 1: < 0 (FFABS( p1 - p0 ) < beta) Q16SAT (xr3, xr6, xr7); // xr3: 1: < 0 (FFABS( p0 - q0 ) < alpha)// FFABS(q1 - q0) - beta Q8ADDE_SS (xr6, xr5, xr14, xr7); // FFABS(q1 - q0) - beta Q16SLR (xr6, xr6, xr7, xr7, 15); // 1: < 0 (FFABS( q1 - q0 ) < beta) Q16SAT (xr5, xr6, xr7); // xr5: 1: < 0 (FFABS( q1 - q0 ) < beta)// if( FFABS( p0 - q0 ) < alpha && ..) Q8MADL_AA (xr0, xr3, xr4, xr3); // if( FFABS( p0 - q0 ) < alpha && ..)// Q8ADDE_AA (xr12, xr11, xr8, xr7); // (xr12, xr7): p1 + 1 Q16SLL (xr12, xr12, xr7, xr7, 1); // (xr12, xr7): 2*p1 + 2 Q8ACCE_AA (xr12, xr10, xr1, xr7); // (xr12, xr7): 2*p1 + p0 + q1 + 2// Q8MADL_AA (xr0, xr3, xr5, xr3); // if( FFABS( p0 - q0 ) < alpha && ... && ...)// Q8ADDE_AA (xr2, xr1, xr8, xr6); // (xr2, xr6): q1 + 1 Q16SLL (xr2, xr2, xr6, xr6, 1); // (xr2, xr6): 2*q1 + 2 Q8ACCE_AA (xr2, xr9, xr11, xr6); // (xr2, xr6): 2*q1 + q0 + p1 + 2// Q16SAR (xr12, xr12, xr7, xr7, 2); // (xr12, xr7): (2*p1 + p0 + q1 + 2) >> 2, new p0 S32SFL (xr0, xr12, xr7, xr12, ptn1); // xr12: new p0 Q8MADL_AA (xr0, xr3, xr12, xr12); // xr12: new p0 mask Q16SAR (xr2, xr2, xr6, xr6, 2); // (xr2, xr6): (2*q1 + q0 + p1 + 2) >> 2, new q0 S32SFL (xr0, xr2, xr6, xr2, ptn1); // xr12: new q0 Q8MADL_AA (xr0, xr3, xr2, xr2); // xr2: new q0 mask// Q8ADD_SS (xr4, xr8, xr3); // xr4: ~(xr3) Q8MADL_AA (xr0, xr4, xr10, xr10); // xr10: old p0 mask Q8MADL_AA (xr0, xr4, xr9, xr9); // xr9: old q0 mask// Q8ADD_AA (xr10, xr10, xr12); // xr10: final p0' Q8ADD_AA (xr9, xr9, xr2); // xr9: final q0'// p0 = S32M2I (xr10); q0 = S32M2I (xr9);// pix[-1] = p0; pix[0] = q0; pix += ystride; pix[-1] = (p0 >> 8); pix[0] = (q0 >> 8); pix += ystride; pix[-1] = (p0 >> 16); pix[0] = (q0 >> 16); pix += ystride; pix[-1] = (p0 >> 24); pix[0] = (q0 >> 24); pix += ystride; }}#elsestatic inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta){ int d; for( d = 0; d < 8; d++ ) { const int p0 = pix[-1*xstride]; const int p1 = pix[-2*xstride]; const int q0 = pix[0]; const int q1 = pix[1*xstride]; if( FFABS( p0 - q0 ) < alpha && FFABS( p1 - p0 ) < beta && FFABS( q1 - q0 ) < beta ) { pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ } pix += ystride; }}static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta){ h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);}static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta){ h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);}#endif#if 1#define H264_LOWPASS(OPNAME, OP, OP2) \static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ const int h=2;\ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ int i;\ for(i=0; i<h; i++)\ {\ OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ dst+=dstStride;\ src+=srcStride;\ }\}\\static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ const int w=2;\ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ int i;\ for(i=0; i<w; i++)\ {\ const int srcB= src[-2*srcStride];\ const int srcA= src[-1*srcStride];\ const int src0= src[0 *srcStride];\ const int src1= src[1 *srcStride];\ const int src2= src[2 *srcStride];\ const int src3= src[3 *srcStride];\ const int src4= src[4 *srcStride];\ OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ dst++;\ src++;\ }\}\\static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ const int h=2;\ const int w=2;\ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ int i;\ src -= 2*srcStride;\ for(i=0; i<h+5; i++)\ {\ tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ tmp+=tmpStride;\ src+=srcStride;\ }\ tmp -= tmpStride*(h+5-2);\ for(i=0; i<w; i++)\ {\ const int tmpB= tmp[-2*tmpStride];\ const int tmpA= tmp[-1*tmpStride];\ const int tmp0= tmp[0 *tmpStride];\ const int tmp1= tmp[1 *tmpStride];\ const int tmp2= tmp[2 *tmpStride];\ const int tmp3= tmp[3 *tmpStride];\ const int tmp4= tmp[4 *tmpStride];\ OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ dst++;\ tmp++;\ }\}\static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -