📄 idct.c
字号:
vector float x0, x1, x2, x3, x4, x5, x6, x7, x8;#ifdef ALTIVEC_VERIFY if (NOT_VECTOR_ALIGNED(block)) mjpeg_error_exit1("idct: block %% 16 != 0, (%d)\n", block);#endif AMBER_START; /* 8x8 matrix transpose (vector short[8]) {{{ */#define MERGE_S16(hl,a,b) vec_merge##hl(vs16(a), vs16(b)) bp = (vector signed short*)block; vs16(x0) = vec_ld(0, bp); vs16(x4) = vec_ld(16*4, bp); vs16(b00) = MERGE_S16(h, x0, x4); vs16(b10) = MERGE_S16(l, x0, x4); bp++; vs16(x1) = vec_ld(0, bp); vs16(x5) = vec_ld(16*4, bp); vs16(b20) = MERGE_S16(h, x1, x5); vs16(b30) = MERGE_S16(l, x1, x5); bp++; vs16(x2) = vec_ld(0, bp); vs16(x6) = vec_ld(16*4, bp); vs16(b40) = MERGE_S16(h, x2, x6); vs16(b50) = MERGE_S16(l, x2, x6); bp++; vs16(x3) = vec_ld(0, bp); vs16(x7) = vec_ld(16*4, bp); vs16(b60) = MERGE_S16(h, x3, x7); vs16(b70) = MERGE_S16(l, x3, x7); vs16(b01) = MERGE_S16(h, b00, b40); vs16(b11) = MERGE_S16(l, b00, b40); vs16(b21) = MERGE_S16(h, b10, b50); vs16(b31) = MERGE_S16(l, b10, b50); vs16(b41) = MERGE_S16(h, b20, b60); vs16(b51) = MERGE_S16(l, b20, b60); vs16(b61) = MERGE_S16(h, b30, b70); vs16(b71) = MERGE_S16(l, b30, b70); vs16(x0) = MERGE_S16(h, b01, b41); vs16(x1) = MERGE_S16(l, b01, b41); vs16(x2) = MERGE_S16(h, b11, b51); vs16(x3) = MERGE_S16(l, b11, b51); vs16(x4) = MERGE_S16(h, b21, b61); vs16(x5) = MERGE_S16(l, b21, b61); vs16(x6) = MERGE_S16(h, b31, b71); vs16(x7) = MERGE_S16(l, b31, b71);#undef MERGE_S16 /* }}} */ /* convert to float {{{ */#define CTF(n) \ vs32(b##n##0) = vec_unpackh(vs16(x##n)); \ vs32(b##n##1) = vec_unpackl(vs16(x##n)); \ b##n##0 = vec_ctf(vs32(b##n##0), 0); \ b##n##1 = vec_ctf(vs32(b##n##1), 0); \ CTF(0); CTF(1); CTF(2); CTF(3); CTF(4); CTF(5); CTF(6); CTF(7);#undef CTF /* }}} */ /* setup constants {{{ */ /* mzero = -0.0 */ mzero = (vector float)vec_splat_u32(-1); mzero = (vector float)vec_sl(vu32(mzero), vu32(mzero)); cp = idctconsts; cnsts0 = vec_ld(0, cp); cp++; cnsts1 = vec_ld(0, cp); cp++; cnsts2 = vec_ld(0, cp); /* }}} */ IDCTROW(b00, b10, b20, b30, b40, b50, b60, b70); IDCTROW(b01, b11, b21, b31, b41, b51, b61, b71); /* 8x8 matrix transpose (vector float[8][2]) {{{ */ x0 = vec_mergel(b00, b20); x1 = vec_mergeh(b00, b20); x2 = vec_mergel(b10, b30); x3 = vec_mergeh(b10, b30); b00 = vec_mergeh(x1, x3); b10 = vec_mergel(x1, x3); b20 = vec_mergeh(x0, x2); b30 = vec_mergel(x0, x2); x4 = vec_mergel(b41, b61); x5 = vec_mergeh(b41, b61); x6 = vec_mergel(b51, b71); x7 = vec_mergeh(b51, b71); b41 = vec_mergeh(x5, x7); b51 = vec_mergel(x5, x7); b61 = vec_mergeh(x4, x6); b71 = vec_mergel(x4, x6); x0 = vec_mergel(b01, b21); x1 = vec_mergeh(b01, b21); x2 = vec_mergel(b11, b31); x3 = vec_mergeh(b11, b31); x4 = vec_mergel(b40, b60); x5 = vec_mergeh(b40, b60); x6 = vec_mergel(b50, b70); x7 = vec_mergeh(b50, b70); b40 = vec_mergeh(x1, x3); b50 = vec_mergel(x1, x3); b60 = vec_mergeh(x0, x2); b70 = vec_mergel(x0, x2); b01 = vec_mergeh(x5, x7); b11 = vec_mergel(x5, x7); b21 = vec_mergeh(x4, x6); b31 = vec_mergel(x4, x6); /* }}} */ /* divide constants by 8 {{{ */ cnst = LD_DIVBY8; cnsts0 = vec_madd(cnsts0, cnst, mzero); cnsts1 = vec_madd(cnsts1, cnst, mzero); /* cnts2 = (cnsts2[0]*DIVBY8, cnsts2[1], cnsts2[2], cnsts[3]) */ x0 = vec_sld(cnsts2, cnsts2, 4); x1 = vec_madd(x0, cnst, mzero); cnsts2 = vec_sld(x1, x0, 12); /* }}} */ IDCTCOL(b00, b10, b20, b30, b40, b50, b60, b70); IDCTCOL(b01, b11, b21, b31, b41, b51, b61, b71); /* round, convert back to short and clip {{{ */ /* cnsts0 = max = 255 = 0x00ff, cnsts2 = min = -256 = 0xff00 {{{ */ vu8(cnsts0) = vec_splat_u8(0); vu8(x8) = vec_splat_u8(-1); vu8(cnsts2) = vec_mergeh(vu8(x8), vu8(cnsts0)); vu8(cnsts0) = vec_mergeh(vu8(cnsts0), vu8(x8)); /* }}} */#define CTS(n) \ b##n##0 = vec_round(b##n##0); \ b##n##1 = vec_round(b##n##1); \ vs32(b##n##0) = vec_cts(b##n##0, 0); \ vs32(b##n##1) = vec_cts(b##n##1, 0); \ vs16(b##n##0) = vec_pack(vs32(b##n##0), vs32(b##n##1)); \ vs16(b##n##0) = vec_min(vs16(b##n##0), vs16(cnsts0)); \ vs16(b##n##0) = vec_max(vs16(b##n##0), vs16(cnsts2)); \ vec_st(vs16(b##n##0), 0, bp); bp = (vector signed short*)block; CTS(0); bp++; CTS(1); bp++; CTS(2); bp++; CTS(3); bp++; CTS(4); bp++; CTS(5); bp++; CTS(6); bp++; CTS(7);#undef CTS /* }}} */ AMBER_STOP;}#if ALTIVEC_TEST_FUNCTION(idct) /* {{{ */#ifdef ALTIVEC_VERIFYvoid idct_altivec_verify(IDCT_PDECL){ int i; idct_altivec(IDCT_ARGS); for (i = 0; i < 64; i++) { if (block[i] < -256) mjpeg_warn("idct: block[%d]=%d < -256\n", i, block[i]); else if (block[i] > 255) mjpeg_warn("idct: block[%d]=%d > 255\n", i, block[i]); }}#elseALTIVEC_TEST(idct, void, (IDCT_PDECL), IDCT_PFMT, IDCT_ARGS);#endif#endif /* }}} *//* vim:set foldmethod=marker foldlevel=0: */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -