📄 img_idct_8x8.h
字号:
/* x7t = x7 >> trunc1; */
/* */
/* // ---------------------------------------------------- // */
/* // Store the results transposed. // */
/* // ---------------------------------------------------- // */
/* o_ptr[ 0] = x0t; */
/* o_ptr[ 8] = x1t; */
/* o_ptr[16] = x2t; */
/* o_ptr[24] = x3t; */
/* o_ptr[32] = x4t; */
/* o_ptr[40] = x5t; */
/* o_ptr[48] = x6t; */
/* o_ptr[56] = x7t; */
/* */
/* o_ptr--; // decrement ptr to next column // */
/* } */
/* */
/* // -------------------------------------------------------- // */
/* // Update output pointer to point to next block. // */
/* // -------------------------------------------------------- // */
/* */
/* o_ptr = o_ptr + 8 - 64; */
/* } */
/* */
/* // ---------------------------------------------------------- // */
/* // Reset our pointers for the vertical pass. // */
/* // ---------------------------------------------------------- // */
/* i_ptr = idct_data + 64; */
/* o_ptr = idct_data; */
/* */
/* for (j = 0; j < num_dcts; j++) */
/* { */
/* // -------------------------------------------------------- // */
/* // Perform Vertical 1-D IDCT on each 8x8 block. Store // */
/* // out the results transposed. // */
/* // -------------------------------------------------------- // */
/* for (i = 0; i < 8; i++) */
/* { */
/* // ---------------------------------------------------- // */
/* // Load the freq-domain coefficients. // */
/* // ---------------------------------------------------- // */
/* X0 = i_ptr[0]; */
/* X1 = i_ptr[1]; */
/* X2 = i_ptr[2]; */
/* X3 = i_ptr[3]; */
/* X4 = i_ptr[4]; */
/* X5 = i_ptr[5]; */
/* X6 = i_ptr[6]; */
/* X7 = i_ptr[7]; */
/* i_ptr += 8; // increment ptr to next row // */
/* */
/* // ---------------------------------------------------- // */
/* // Even part of decomp. Add rounding term to DC. // */
/* // ---------------------------------------------------- // */
/* P0 = (((int)X0) << c4_shift) + round2; // c4 is a shift // */
/* P1 = (((int)X4) << c4_shift); // c4 is a shift // */
/* */
/* p0 = P0 + P1; */
/* p1 = P0 - P1; */
/* */
/* r1 = X2*c6 - X6*c2; */
/* r0 = X2*c2 + X6*c6; */
/* */
/* g0 = p0 + r0; */
/* g1 = p1 + r1; */
/* h1 = p1 - r1; */
/* h0 = p0 - r0; */
/* */
/* // ---------------------------------------------------- // */
/* // Odd part of decomp. // */
/* // ---------------------------------------------------- // */
/* g2 = (X1*c7 - X3*c5) + (X5*c3 - X7*c1); */
/* g3 = (X1*c5 - X3*c1) + (X5*c7 + X7*c3); */
/* h3 = (X1*c3 - X3*c7) - (X5*c1 + X7*c5); */
/* h2 = (X1*c1 + X3*c3) + (X5*c5 + X7*c7); */
/* */
/* // ---------------------------------------------------- // */
/* // Final butterfly. // */
/* // ---------------------------------------------------- // */
/* x0 = g0 + h2; */
/* x1 = g1 + h3; */
/* x2 = h1 + g3; */
/* x3 = h0 + g2; */
/* x4 = h0 - g2; */
/* x5 = h1 - g3; */
/* x6 = g1 - h3; */
/* x7 = g0 - h2; */
/* */
/* // ---------------------------------------------------- // */
/* // Truncate and saturate final results. // */
/* // ---------------------------------------------------- // */
/* x0t = x0 >> trunc2; */
/* x1t = x1 >> trunc2; */
/* x2t = x2 >> trunc2; */
/* x3t = x3 >> trunc2; */
/* x4t = x4 >> trunc2; */
/* x5t = x5 >> trunc2; */
/* x6t = x6 >> trunc2; */
/* x7t = x7 >> trunc2; */
/* */
/* x0s = x0t < -256 ? -256 : x0t > 255 ? 255 : x0t; */
/* x1s = x1t < -256 ? -256 : x1t > 255 ? 255 : x1t; */
/* x2s = x2t < -256 ? -256 : x2t > 255 ? 255 : x2t; */
/* x3s = x3t < -256 ? -256 : x3t > 255 ? 255 : x3t; */
/* x4s = x4t < -256 ? -256 : x4t > 255 ? 255 : x4t; */
/* x5s = x5t < -256 ? -256 : x5t > 255 ? 255 : x5t; */
/* x6s = x6t < -256 ? -256 : x6t > 255 ? 255 : x6t; */
/* x7s = x7t < -256 ? -256 : x7t > 255 ? 255 : x7t; */
/* */
/* // ---------------------------------------------------- // */
/* // Store the results transposed in the result area. // */
/* // ---------------------------------------------------- // */
/* o_ptr[ 0] = x0s; */
/* o_ptr[ 8] = x1s; */
/* o_ptr[16] = x2s; */
/* o_ptr[24] = x3s; */
/* o_ptr[32] = x4s; */
/* o_ptr[40] = x5s; */
/* o_ptr[48] = x6s; */
/* o_ptr[56] = x7s; */
/* */
/* o_ptr++; // increment ptr to next column // */
/* } */
/* // -------------------------------------------------------- // */
/* // Update output pointer to point to next block. // */
/* // -------------------------------------------------------- // */
/* o_ptr = o_ptr - 8 + 64; */
/* } */
/* } */
/* */
/* */
/* Note: This code guarantees correct operation, even in the case */
/* that 'num_idcts == 0'. In that case, the function runs for only */
/* 35 cycles (counting 6 cycles of function-call overhead), due to */
/* early-exit code. The early-exit case performs no accesses to the */
/* idct_data[] array. */
/* */
/* TECHNIQUES */
/* All levels of looping are collapsed into single loops which are */
/* pipelined. The outer loop focuses on 8-pt IDCTs, whereas the */
/* inner loop controls the column-pointer to handle jumps between */
/* IDCT blocks. */
/* */
/* For performance, portions of the code outside the loops have been */
/* inter-scheduled with the prolog and epilog code of the loops. */
/* Also, twin stack-pointers are used to accelerate stack accesses. */
/* Finally, pointer values and cosine term registers are reused */
/* between the horizontal and vertical loops to save the need for */
/* messy pointer and constant reinitialization. */
/* */
/* To save codesize, prolog and epilog collapsing have been performed */
/* to the extent that it does not impact performance. Also, code */
/* outside the loops has been scheduled to pack as tightly into */
/* fetch packets as possible to avoid alignment padding NOPs. */
/* */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -