📄 h264dsputil.c
字号:
S32SFL (xr0, xr15, xr15, xr15, ptn3); // xr15: tc0[i]// tpix = pix; S32LDD (xr2, tpix, 0); // xr2: q3,q2,q1,q0 S32LDD (xr1, tpix, -4); // xr1: p0,p1,p2,p3 S32LDIV (xr4, tpix, stride, 0); // xr4: q3',q2',q1',q0' S32LDD (xr3, tpix, -4); // xr3: p0',p1',p2',p3'// S32SFL (xr6, xr4, xr2, xr8, ptn0); // xr6: q3'q3q2'q2, xr8:q1'q1q0'q0 S32SFL (xr5, xr3, xr1, xr7, ptn0); // xr5: p0'p0p1'p1, xr7:p2'p2p3'p3// S32LDIV (xr2, tpix, stride, 0); // xr2: q3",q2",q1",q0" S32LDD (xr1, tpix, -4); // xr1: p0",p1",p2",p3" S32LDIV (xr4, tpix, stride, 0); // xr4: q3^,q2^,q1^,q0^ S32LDD (xr3, tpix, -4); // xr3: p0^,p1^,p2^,p3^// S32SFL (xr4, xr4, xr2, xr2, ptn0); // xr4: q3^q3"q2^q2", xr2:q1^q1"q0^q0" S32SFL (xr3, xr3, xr1, xr1, ptn0); // xr3: p0^p0"p1^p1", xr1:p2^p2"p3^p3"// S32SFL (xr10, xr3, xr5, xr11, ptn3); // xr10: p0^p0"p0'p0, xr11:p1^p1"p1'p1 S32SFL (xr12, xr1, xr7, xr0, ptn3); // xr12: p2^p2"p2'p2 S32SFL (xr1, xr2, xr8, xr9, ptn3); // xr1: q1^q1"q1'q1, xr9:q0^q0"q0'q0 S32SFL (xr0, xr4, xr6, xr2, ptn3); // xr2: q2^q2"q2'q2// Q8ABD(xr3, xr10, xr9 ); // FFABS (p0 - q0) Q8ABD(xr4, xr11, xr10); // FFABS (p1 - p0) Q8ABD(xr5, xr1, xr9 ); // FFABS (q1 - q0)// FFABS(p0 - q0) - alpha, FFABS(p1 - p0) - beta Q8ADDE_SS (xr6, xr3, xr13, xr7); // FFABS(p0 - q0) - alpha Q8ADDE_SS (xr3, xr4, xr14, xr4); // FFABS(p1 - p0) - beta Q16SLR (xr6, xr6, xr7, xr7, 15); // 1: < 0 (FFABS( p0 - q0 ) < alpha) Q16SLR (xr3, xr3, xr4, xr4, 15); // 1: < 0 (FFABS( p1 - p0 ) < beta) Q16SAT (xr4, xr3, xr4); // xr4: 1: < 0 (FFABS( p1 - p0 ) < beta) Q16SAT (xr3, xr6, xr7); // xr3: 1: < 0 (FFABS( p0 - q0 ) < alpha)// FFABS(q1 - q0) - beta Q8ADDE_SS (xr6, xr5, xr14, xr7); // FFABS(q1 - q0) - beta Q16SLR (xr6, xr6, xr7, xr7, 15); // 1: < 0 (FFABS( q1 - q0 ) < beta) Q16SAT (xr5, xr6, xr7); // xr5: 1: < 0 (FFABS( q1 - q0 ) < beta)// if( FFABS( p0 - q0 ) < alpha && ..) Q8MADL_AA (xr0, xr3, xr4, xr3); Q8ABD(xr6, xr12, xr10 ); // FFABS (p2 - p0) Q8ABD(xr7, xr2, xr9 ); // FFABS (q2 - q0) Q8MADL_AA (xr0, xr3, xr5, xr3); // xr3: (FFABS( p0 - q0 ) < alpha // && FFABS( p1 - p0 ) < beta && ...) Q8ADDE_SS (xr4, xr6, xr14, xr5); // FFABS(p2 - p0) - beta Q8ADDE_SS (xr6, xr7, xr14, xr7); // FFABS(q2 - q0) - beta Q8MADL_AA (xr0, xr3, xr15, xr15); // xr15: new clip value Q16SLR (xr4, xr4, xr5, xr5, 15); // 1: < 0 FFABS(p2 - p0) - beta Q16SLR (xr6, xr6, xr7, xr7, 15); // 1: < 0 FFABS(q2 - q0) - beta Q16SAT (xr4, xr4, xr5); // xr4: 1: < 0 (FFABS( p2 - p0 ) < beta) Q16SAT (xr5, xr6, xr7); // xr5: 1: < 0 (FFABS( q2 - q0 ) < beta)// calculate clip value Q8ADD_AA (xr6, xr4, xr5); // pre-calculate for tc++, tc++ Q8MADL_AA (xr0, xr6, xr3, xr6); // xr6: new clip differ Q8MADL_AA (xr0, xr4, xr15, xr4); // xr4: new clip value for p1 Q8MADL_AA (xr0, xr5, xr15, xr5); // xr5: new clip value for q1 Q8ADD_AA (xr3, xr6, xr15); // xr3: new clip value for p0,q0// Q8AVGR (xr6, xr9, xr10); // (p0 + q0 + 1) >> 1 Q8AVG (xr12, xr6, xr12); // (p2 + (p0 + q0 + 1) >> 1) >> 1 Q8AVG (xr2, xr6, xr2); // (q2 + (p0 + q0 + 1) >> 1) >> 1 Q8ADDE_SS (xr12, xr12, xr11, xr7); // (p2 + (p0 + q0 + 1) >> 1) >> 1 - p1 Q8ADDE_SS (xr2, xr2, xr1, xr8); // (q2 + (p0 + q0 + 1) >> 1) >> 1 - q1 Q8ADDE_SS (xr6, xr0, xr4, xr15); //-tc//av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); D16MAX (xr12, xr12, xr6); D16MAX (xr7, xr7, xr15); Q8ADDE_AA (xr6, xr0, xr4, xr15); //tc D16MIN (xr12, xr12, xr6); D16MIN (xr7, xr7, xr15); Q8ADDE_SS (xr6, xr0, xr5, xr15); //-tc S32SFL (xr0, xr12, xr7, xr12, ptn1);//av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); D16MAX (xr2, xr2, xr6); D16MAX (xr8, xr8, xr15); Q8ADDE_AA (xr6, xr0, xr5, xr15); //tc D16MIN (xr2, xr2, xr6); D16MIN (xr8, xr8, xr15);// p1 + ...; q1 + ... Q8ADD_AA (xr4, xr12, xr11); // p1 + ... //final p1 S32SFL (xr0, xr2, xr8, xr2, ptn1); Q8ADD_AA (xr5, xr2, xr1); // q1 + ... //final q1// (q0 - p0 ) << 2 + (p1 - q1) Q8ADDE_SS (xr2, xr9, xr10, xr12); // q0 - p0 Q16SLL (xr2, xr2, xr12, xr12, 2); // (q0 - p0) << 2 Q8ACCE_SS (xr2, xr11, xr1, xr12); // (q0 - p0) << 2 + (p1 - q1)// -tc, tc Q8ADDE_AA (xr11, xr0, xr3, xr15); //+tc (xr11, xr15) Q8ADDE_SS (xr1, xr0, xr3, xr6); //-tc (xr1, xr6)// i_delta = av_clip (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3 ....) Q16SAR (xr2, xr2, xr12, xr12, 2); // ((q0 - p0) << 2 + (p1 - q1) + 0) >> 2 D16AVGR(xr2, xr2, xr0); D16AVGR(xr12, xr12, xr0); // ((q0 - p0) << 2 + (p1 - q1) + 1) >> 1 D16MAX (xr2, xr2, xr1); D16MAX (xr12, xr12, xr6); D16MIN (xr2, xr2, xr11); D16MIN (xr12, xr12, xr15); Q16ADD_SS_WW (xr1, xr0, xr2, xr0); // xr1 = -xr2 Q16ADD_SS_WW (xr11, xr0, xr12, xr0); // xr11 = -xr12// Q8ACCE_AA (xr2, xr0, xr10, xr12); // (p0 + i_delta) Q8ACCE_AA (xr1, xr0, xr9, xr11); // (q0 - i_delta) Q16SAT (xr10, xr2, xr12); // final p0 Q16SAT (xr9, xr1, xr11); // final q0// store S32SFL (xr2, xr10, xr4, xr1, ptn0); // xr2: p0^p1^p0"p1", xr1:p0'p1'p0p1 S32SFL (xr4, xr5, xr9, xr3, ptn0); // xr4: q1^q0^q1"q0", xr3:q1'q0'q1q0 t1 = S32M2I(xr1); t2 = S32M2I(xr2); t3 = S32M2I(xr3); t4 = S32M2I(xr4); *((unsigned short *)pix - 1) = t1; *((unsigned short *)pix + 0) = t3; pix += stride; *((unsigned short *)pix - 1) = (t1 >> 16); *((unsigned short *)pix + 0) = (t3 >> 16); pix += stride; *((unsigned short *)pix - 1) = t2; *((unsigned short *)pix + 0) = t4; pix += stride; *((unsigned short *)pix - 1) = (t2 >> 16); *((unsigned short *)pix + 0) = (t4 >> 16); pix += stride; }}#elsestatic inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0){ int i, d; for( i = 0; i < 4; i++ ) { if( tc0[i] < 0 ) { pix += 4*ystride; continue; } for( d = 0; d < 4; d++ ) { const int p0 = pix[-1*xstride]; const int p1 = pix[-2*xstride]; const int p2 = pix[-3*xstride]; const int q0 = pix[0]; const int q1 = pix[1*xstride]; const int q2 = pix[2*xstride]; if( FFABS( p0 - q0 ) < alpha && FFABS( p1 - p0 ) < beta && FFABS( q1 - q0 ) < beta ) { int tc = tc0[i]; int i_delta; if( FFABS( p2 - p0 ) < beta ) { pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); tc++; } if( FFABS( q2 - q0 ) < beta ) { pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); tc++; } i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */ pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ } pix += ystride; } }}static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0){ h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);}static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0){ h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);}#endif#ifdef JZ4740_MXU_OPTstatic void h264_v_loop_filter_chroma_mxu(uint8_t *pix, int xstride, int alpha, int beta, int8_t *tc0){ uint8_t *tpix; int i, d; S32I2M (xr14, beta); S32SFL (xr0, xr14, xr14, xr14, ptn0); S32SFL (xr0, xr14, xr14, xr14, ptn3); // xr14: beta S32I2M (xr13, alpha); S32SFL (xr0, xr13, xr13, xr13, ptn0); S32SFL (xr0, xr13, xr13, xr13, ptn3); // xr13: alpha S32I2M (xr8, 0x01010101); for(i = 0; i < 2; i++) { const int tc = *((short *)tc0 + i);// tpix = pix - 2*xstride; S32LDD (xr11, tpix, 0); // p1 S32LDIV(xr10, tpix, xstride, 0); // p0 S32LDIV(xr9, tpix, xstride, 0); // q0 S32LDIV(xr1, tpix, xstride, 0); // q1 Q8ABD(xr3, xr10, xr9 ); // FFABS (p0 - q0) Q8ABD(xr4, xr11, xr10); // FFABS (p1 - p0) Q8ABD(xr5, xr1, xr9 ); // FFABS (q1 - q0)// FFABS(p0 - q0) - alpha, FFABS(p1 - p0) - beta Q8ADDE_SS (xr6, xr3, xr13, xr7); // FFABS(p0 - q0) - alpha Q8ADDE_SS (xr3, xr4, xr14, xr4); // FFABS(p1 - p0) - beta Q16SLR (xr6, xr6, xr7, xr7, 15); // 1: < 0 (FFABS( p0 - q0 ) < alpha) Q16SLR (xr3, xr3, xr4, xr4, 15); // 1: < 0 (FFABS( p1 - p0 ) < beta) Q16SAT (xr4, xr3, xr4); // xr4: 1: < 0 (FFABS( p1 - p0 ) < beta Q16SAT (xr3, xr6, xr7); // xr3: 1: < 0 (FFABS( p0 - q0 ) < alpha)// FFABS(q1 - q0) - beta Q8ADDE_SS (xr6, xr5, xr14, xr7); // FFABS(q1 - q0) - beta Q16SLR (xr6, xr6, xr7, xr7, 15); // 1: < 0 (FFABS( q1 - q0 ) < beta) Q16SAT (xr5, xr6, xr7); // xr5: 1: < 0 (FFABS( q1 - q0 ) < beta)// if( FFABS( p0 - q0 ) < alpha && ..) Q8MADL_AA (xr0, xr3, xr4, xr3);// xr15 = tc S32I2M (xr2, tc); S32SFL (xr0, xr2, xr2, xr15, ptn0);// Q8ADDE_SS (xr2, xr9, xr10, xr12); // q0 - p0 Q8ACCE_AA (xr2, xr0, xr8, xr12); // (q0 - p0 + 1) Q8MADL_AA (xr0, xr3, xr5, xr3); // xr3: (FFABS( p0 - q0 ) < alpha // && FFABS( p1 - p0 ) < beta && ...) Q8MAX (xr15, xr0, xr15); Q16SLL (xr2, xr2, xr12, xr12, 2); // (q0 - p0) << 2 + 4 Q8MADL_AA (xr0, xr3, xr15, xr15); // xr15: (FFABS( p0 - q0 ) < alpha .... ) * tc Q8ACCE_SS (xr2, xr11, xr1, xr12); // (q0 - p0) << 2 + (p1 - q1) + 4// -tc, tc Q8ADDE_AA (xr7, xr0, xr15, xr6); // tc1, tc0 Q8ADDE_SS (xr5, xr0, xr15, xr4); // -tc1, -tc0 Q16SAR (xr2, xr2, xr12, xr12, 3); D16MAX (xr2, xr2, xr5); D16MAX (xr12, xr12, xr4); D16MIN (xr2, xr2, xr7); D16MIN (xr12, xr12, xr6);// p0 + delta, p1 - delta Q16ADD_SS_WW (xr1, xr0, xr2, xr0); // xr1 = -xr2 -delta Q16ADD_SS_WW (xr11, xr0, xr12, xr0); // xr11 = -xr12 -delta Q8ACCE_AA (xr2, xr0, xr10, xr12); // (p0 + i_delta) Q8ACCE_AA (xr1, xr0, xr9, xr11); // (q0 - i_delta) tpix = pix - 2*xstride; Q16SAT (xr2, xr2, xr12); Q16SAT (xr1, xr1, xr11); S32SDIV(xr2, tpix, xstride, 0); //-1, p0 S32SDIV(xr1, tpix, xstride, 0); //0, q0 pix += 4; }}static void h264_h_loop_filter_chroma_mxu(uint8_t *pix, int ystride, int alpha, int beta, int8_t *tc0){ int i, d; uint8_t *tpix; unsigned int p0, q0; S32I2M (xr14, beta); S32SFL (xr0, xr14, xr14, xr14, ptn0); S32SFL (xr0, xr14, xr14, xr14, ptn3); // xr14: beta S32I2M (xr13, alpha); S32SFL (xr0, xr13, xr13, xr13, ptn0); S32SFL (xr0, xr13, xr13, xr13, ptn3); // xr13: alpha S32I2M (xr8, 0x01010101); for(i = 0; i < 2; i++) { const int tc = *((short *)tc0 + i);// tpix = pix; S32LDD (xr2, tpix, 0); // xr2: q3,q2,q1,q0 S32LDD (xr1, tpix, -4); // xr1: p0,p1,p2,p3 S32LDIV (xr4, tpix, ystride, 0); // xr4: q3',q2',q1',q0' S32LDD (xr3, tpix, -4); // xr3: p0',p1',p2',p3'// S32SFL (xr6, xr4, xr2, xr12, ptn0); // xr6: q3'q3q2'q2, xr12:q1'q1q0'q0 S32SFL (xr5, xr3, xr1, xr7, ptn0); // xr5: p0'p0p1'p1, xr7:p2'p2p3'p// S32LDIV (xr2, tpix, ystride, 0); // xr2: q3",q2",q1",q0" S32LDD (xr1, tpix, -4); // xr1: p0",p1",p2",p3" S32LDIV (xr4, tpix, ystride, 0); // xr4: q3^,q2^,q1^,q0^ S32LDD (xr3, tpix, -4); // xr3: p0^,p1^,p2^,p3^// S32SFL (xr4, xr4, xr2, xr2, ptn0); // xr4: q3^q3"q2^q2", xr2:q1^q1"q0^q0" S32SFL (xr3, xr3, xr1, xr1, ptn0); // xr3: p0^p0"p1^p1", xr1:p2^p2"p3^p3"// S32SFL (xr10, xr3, xr5, xr11, ptn3); // xr10: p0^p0"p0'p0, xr11:p1^p1"p1'p1 S32SFL (xr1, xr2, xr12, xr9, ptn3); // xr1: q1^q1"q1'q1, xr9:q0^q0"q0'q0// Q8ABD(xr3, xr10, xr9 ); // FFABS (p0 - q0) Q8ABD(xr4, xr11, xr10); // FFABS (p1 - p0) Q8ABD(xr5, xr1, xr9 ); // FFABS (q1 - q0)// FFABS(p0 - q0) - alpha, FFABS(p1 - p0) - beta Q8ADDE_SS (xr6, xr3, xr13, xr7); // FFABS(p0 - q0) - alpha Q8ADDE_SS (xr3, xr4, xr14, xr4); // FFABS(p1 - p0) - beta Q16SLR (xr6, xr6, xr7, xr7, 15); // 1: < 0 (FFABS( p0 - q0 ) < alpha) Q16SLR (xr3, xr3, xr4, xr4, 15); // 1: < 0 (FFABS( p1 - p0 ) < beta) Q16SAT (xr4, xr3, xr4); // xr4: 1: < 0 (FFABS( p1 - p0 ) < beta Q16SAT (xr3, xr6, xr7); // xr3: 1: < 0 (FFABS( p0 - q0 ) < alpha)// FFABS(q1 - q0) - beta Q8ADDE_SS (xr6, xr5, xr14, xr7); // FFABS(q1 - q0) - beta Q16SLR (xr6, xr6, xr7, xr7, 15); // 1: < 0 (FFABS( q1 - q0 ) < beta) Q16SAT (xr5, xr6, xr7); // xr5: 1: < 0 (FFABS( q1 - q0 ) < beta)// if( FFABS( p0 - q0 ) < alpha && ..) Q8MADL_AA (xr0, xr3, xr4, xr3);// xr15 = tc S32I2M (xr2, tc); S32SFL (xr0, xr2, xr2, xr15, ptn0);// Q8ADDE_SS (xr2, xr9, xr10, xr12); // q0 - p0 Q8ACCE_AA (xr2, xr0, xr8, xr12); // (q0 - p0 + 1) Q8MADL_AA (xr0, xr3, xr5, xr3); // xr3: (FFABS( p0 - q0 ) < alpha // && FFABS( p1 - p0 ) < beta && ...) Q8MAX (xr15, xr0, xr15); Q16SLL (xr2, xr2, xr12, xr12, 2); // (q0 - p0) << 2 + 4 Q8MADL_AA (xr0, xr3, xr15, xr15); // xr15: (FFABS( p0 - q0 ) < alpha .... ) * tc Q8ACCE_SS (xr2, xr11, xr1, xr12); // (q0 - p0) << 2 + (p1 - q1) + 4// -tc, tc Q8ADDE_AA (xr7, xr0, xr15, xr6); // tc1, tc0 Q8ADDE_SS (xr5, xr0, xr15, xr4); // -tc1, -tc0 Q16SAR (xr2, xr2, xr12, xr12, 3); D16MAX (xr2, xr2, xr5); D16MAX (xr12, xr12, xr4); D16MIN (xr2, xr2, xr7); D16MIN (xr12, xr12, xr6);// p0 + delta, p1 - delta
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -