📄 quant_non_intra.c
字号:
/* 0x01080010 = ((1<<24) & 0x1F000000)|((8<<16) & 0x00FF0000)|(16 & 0xFFFF) */ ps = src; pd = dst;#ifdef ALTIVEC_DST vec_dst(inter_q_mat, 0x01080010, 0); vec_dst(ps, 0x01040010, 1); vec_dstst(pd, 0x01040010, 2);#endif vu.s.mquant = mquant; vu.s.clipvalue = dctsatlim; zero = vec_splat_u16(0); vu32(one) = vec_splat_u32(1); one = vec_ctf(vu32(one), 0); nzblockbits = 0; N = BLOCK_COUNT; i = N; last_block = 0; /* counting down from i */ recalc_blocks = 0; do {recalc: pqm = inter_q_mat; vu16(nz) = vec_splat_u16(0); max = vec_splat_u16(0); j = 4; do {#ifdef ALTIVEC_DST vec_dst(ps, 0x01040010, 1); vec_dstst(pd, 0x01040010, 2);#endif QUANT_NON_INTRA_AB; max = vec_max(max, vu16(dstA)); max = vec_max(max, vu16(dstB)); SIGN_AND_STORE; } while (--j); /* check for clipping/saturation {{{ */ vuv = vec_ld(0, (unsigned short*)&vu); vuv = vec_splat(vuv, 1); /* splat clipvalue */ if (vec_any_gt(max, vuv)) { int next_mquant = next_larger_quant(q_scale_type, mquant); if (next_mquant == mquant) { /* saturation has occured, clip values then * goto saturated jump point. */ pd -= 8*8; /* reset pointer to beginning of block */ j = 4; do { srcA = vec_ld(0, pd); pd += 8; srcB = vec_ld(0, pd); /* (dstA,dstB) = abs(srcA,srcB) {{{ */ vs16(t1) = vec_subs(vs16(zero), srcA); vs16(t2) = vec_subs(vs16(zero), srcB); dstA = vec_max(srcA, vs16(t1)); dstB = vec_max(srcB, vs16(t2)); /* }}} */ /* (dstA,dstB) = clip(dstA,dstB, vuv) {{{ */ vu16(dstA) = vec_min(vu16(dstA), vuv); vu16(dstB) = vec_min(vu16(dstB), vuv); /* }}} */ /* restore sign {{{ */ selA = vec_cmpgt(vs16(zero), srcA); selB = vec_cmpgt(vs16(zero), srcB); vs16(t1) = vec_subs(vs16(zero), dstA); vs16(t2) = vec_subs(vs16(zero), dstB); dstA = vec_sel(dstA, vs16(t1), selA); dstB = vec_sel(dstB, vs16(t2), selB); /* }}} */ pd -= 8; vec_st(dstA, 0, pd); pd += 8; vec_st(dstB, 0, pd); pd += 8; } while (--j); goto saturated; } /* load new (int)mquant into (short)vu.s.mquant */ mquant = next_mquant; vu.s.mquant = next_mquant; nzblockbits = 0; recalc_blocks = N - i; /* reset pointers to beginning of block */ ps -= 8*8; pd -= 8*8; goto recalc; } /* }}} */ i--; UPDATE_NZBLOCKBITS; } while (i); /* recalculate blocks if necessary. this branch of code does not * need to worry about saturation or clipping. */ if (recalc_blocks > 0) { i = N; last_block = N - recalc_blocks; ps = src; pd = dst; do { pqm = inter_q_mat; vu16(nz) = vec_splat_u16(0); j = 4; do {#ifdef ALTIVEC_DST vec_dst(ps, 0x01040010, 1); vec_dstst(pd, 0x01040010, 2);#endif QUANT_NON_INTRA_AB; SIGN_AND_STORE; } while (--j); i--; UPDATE_NZBLOCKBITS; } while (i > last_block); } goto done; /* * the following code is entered at the label 'saturated'. * this branch of code clips the destination values. */ do { i = N; last_block = N - recalc_blocks; ps = src; pd = dst; recalc_blocks = 0; /* no more blocks after next loop */ do { pqm = inter_q_mat; vu16(nz) = vec_splat_u16(0); j = 4; do {#ifdef ALTIVEC_DST vec_dst(ps, 0x01040010, 1); vec_dstst(pd, 0x01040010, 2);#endif QUANT_NON_INTRA_AB; /* clip {{{ */ vuv = vec_ld(0, (unsigned short*)&vu); vuv = vec_splat(vuv, 1); /* splat clipvalue */ vu16(dstA) = vec_min(vu16(dstA), vuv); vu16(dstB) = vec_min(vu16(dstB), vuv); /* }}} */ SIGN_AND_STORE; } while (--j);saturated: i--; UPDATE_NZBLOCKBITS; } while (i > last_block); } while (recalc_blocks > 0);done:#ifdef ALTIVEC_DST vec_dssall();#endif *nonsat_mquant = mquant; AMBER_STOP; return nzblockbits;}#if ALTIVEC_TEST_FUNCTION(quant_non_intra) /* {{{ */# ifdef ALTIVEC_VERIFYint quant_non_intra_altivec_verify(QUANT_NON_INTRA_PDECL){ int i, len, nzb1, nzb2, nsmq, nsmq1, nsmq2; unsigned long checksum1, checksum2; int16_t *dstcpy; uint16_t *inter_q = (uint16_t*)wsp->inter_q_mat; len = 64 * BLOCK_COUNT; dstcpy = (int16_t*)malloc(len*sizeof(int16_t)); if (dstcpy == NULL) mjpeg_error_exit1("quant_non_intra_verify: unable to malloc"); /* save nonsat_mquant */ nsmq = *nonsat_mquant; nzb1 = quant_non_intra_altivec(QUANT_NON_INTRA_ARGS); nsmq1 = *nonsat_mquant; for (checksum1 = i = 0; i < len; i++) checksum1 += abs(dst[i]); memcpy(dstcpy, dst, len*sizeof(int16_t)); /* restore nonsat_mquant */ *nonsat_mquant = nsmq; nzb2 = ALTIVEC_TEST_WITH(quant_non_intra)(QUANT_NON_INTRA_ARGS); nsmq2 = *nonsat_mquant; for (checksum2 = i = 0; i < len; i++) checksum2 += abs(dst[i]); if (nzb1 != nzb2 || checksum1 != checksum2 || nsmq1 != nsmq2) { mjpeg_debug("quant_non_intra(" QUANT_NON_INTRA_PFMT ")", QUANT_NON_INTRA_ARGS); mjpeg_debug("quant_non_intra: results differ " "{nzb=%d, checksum=%d, mquant=%d} != " "{nzb=%d, checksum=%d, mquant=%d}", nzb1, checksum1, nsmq1, nzb2, checksum2, nsmq2); } for (i = 0; i < len; i++) { if (dstcpy[i] != dst[i]) { mjpeg_debug("quant_non_intra: src[%d]=%d, qmat=%d, " "dst %d != %d", i, src[i], inter_q[i&63], dstcpy[i], dst[i]); } } free(dstcpy); return nzb2;}# elseALTIVEC_TEST(quant_non_intra, int, (QUANT_NON_INTRA_PDECL), QUANT_NON_INTRA_PFMT, QUANT_NON_INTRA_ARGS);# endif#endif /* }}} *//* vim:set foldmethod=marker foldlevel=0: */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -