📄 swdec_idct.c
字号:
{
/* If all coefficients x1..x7 == 0, the first coefficient x0
defines the idct. This 1-D shortcut minimizes the counting.
*/
if (!((x1 = block[i+1]) | (x2 = block[i+2]) | (x3 = block[i+3]) |
(x4 = block[i+4]) | (x5 = block[i+5]) | (x6 = block[i+6]) |
(x7 = block[i+7])))
{
block[i+1]=block[i+2]=block[i+3]=block[i+4]=
block[i+5]=block[i+6]=block[i+7]=
block[i]=block[i] << 6;
}
else /* Full idct for this row */
{
x0 = block[i];
x8 = x1 + x7;
x7 = x1 - x7;
x1 = x8;
x3 = L0 * x3;
x5 = L0 * x5;
x8 = x0 + x4;
x4 = x0 - x4;
x0 = x8;
t = L9 * (x2 + x6); /* Scale-rotate operation sqrt(2)*C6 */
x8 = L7 * x2 + t;
x2 = -L8 * x6 + t;
x6 = x8;
x8 = (x7 << 7) + x5;
x5 = (x7 << 7) - x5;
x7 = x8;
x8 = (x1 << 7) + x3;
x3 = (x1 << 7) - x3;
x1 = x8;
/* 2^11 added for proper rounding at the last stage */
x0 = (x0 << 18) + 2048;
x4 = (x4 << 18) + 2048;
x8 = x0 + x6;
x6 = x0 - x6;
x0 = x8;
x8 = x4 + x2;
x2 = x4 - x2;
x4 = x8;
t = L3 * (x3 + x5); /* Scale-rotate operation C1 */
x8 = -L1 * x3 + t;
x3 = -L2 * x5 + t;
x5 = x8;
t = L6 * (x7 + x1); /* Scale-rotate operation C3 */
x8 = -L4 * x7 + t;
x7 = -L5 * x1 + t;
x1 = x8;
block[i+0] = (x0 + x1) >> 12;
block[i+1] = (x4 + x5) >> 12;
block[i+2] = (x2 + x3) >> 12;
block[i+3] = (x6 + x7) >> 12;
block[i+4] = (x6 - x7) >> 12;
block[i+5] = (x2 - x3) >> 12;
block[i+6] = (x4 - x5) >> 12;
block[i+7] = (x0 - x1) >> 12;
}
} /* End of horizontal transform */
/* Vertical transform */
for (i = 7; i >= 0; i--)
{
/* 2-stage zero-1D shortcut check separates three column idct cases:
7 zero coeffs, 5 zero coeffs and less than 5 zero coeffs.
The more zero coeffs are found the less counting is needed.
*/
if (!((x3 = block[24+i]) | (x4 = block[32+i]) | (x5 = block[40+i]) |
(x6 = block[48+i]) | (x7 = block[56+i])))
{ /* atleast 5 last coeffs are 0 */
if (!((x1 = block[8+i]) | (x2 = block[16+i])))
{ /* atleast 7 last coeffs are 0 */
x0 = (block[i] + 256) >> 9;
if (x0 > 255) x0 = 255;
if (x0 < -256) x0 = -256;
block[i]=block[8+i]=block[16+i]=block[24+i]=
block[32+i]=block[40+i]=block[48+i]=
block[56+i]=x0;
}
else /* 5 last coeffs (x7, x6, x5, x4 and x3) are 0 */
{
x0 = block[i];
x7 = x1;
x0 += 256; /* 2^8 added for proper rounding */
x4 = x0;
t = L9B * x2; /* Scale-rotate operation sqrt(2)*C6 */
x6 = L7B * x2 + t;
x2 = t;
x7 = x7 << 2;
x5 = x7;
x1 = x1 << 2;
x3 = x1;
x8 = (x0 << 13) + x6;
x6 = (x0 << 13) - x6;
x0 = x8;
x8 = (x4 << 13) + x2;
x2 = (x4 << 13) - x2;
x4 = x8;
t = L3 * (x3 + x5); /* Scale-rotate operation C1 */
x8 = -L1 * x3 + t;
x3 = -L2 * x5 + t;
x5 = x8;
t = L6 * (x7 + x1); /* Scale-rotate operation C3 */
x8 = -L4 * x7 + t;
x7 = -L5 * x1 + t;
x1 = x8;
#ifndef MP4DEC_ARM11
block[i] = (i32)clp[(x0 + x1) >> 22];
block[8+i] = (i32)clp[(x4 + x5) >> 22];
block[16+i] = (i32)clp[(x2 + x3) >> 22];
block[24+i] = (i32)clp[(x6 + x7) >> 22];
block[32+i] = (i32)clp[(x6 - x7) >> 22];
block[40+i] = (i32)clp[(x2 - x3) >> 22];
block[48+i] = (i32)clp[(x4 - x5) >> 22];
block[56+i] = (i32)clp[(x0 - x1) >> 22];
#else
/* scale and write back to buffer
* saturated coefficients */
x8 = (x0 + x1) >> 22;
x1 = (x0 - x1) >> 22;
x0 = (x4 + x5) >> 22;
x4 = (x4 - x5) >> 22;
x5 = (x2 + x3) >> 22;
x2 = (x2 - x3) >> 22;
x3 = (x6 + x7) >> 22;
x6 = (x6 - x7) >> 22;
__asm
{
SSAT x7, #9, x8; /* signed saturation [-256,255] */
MOV x8, i, LSL #2; /* get address */
STR x7, [block, x8];/* store coefficient */
SSAT x7, #9, x0;
ADD x8, x8, #32;
STR x7, [block, x8];
SSAT x7, #9, x5;
ADD x8, x8, #32;
STR x7, [block, x8];
SSAT x7, #9, x3;
ADD x8, x8, #32;
STR x7, [block, x8];
SSAT x7, #9, x6;
ADD x8, x8, #32;
STR x7, [block, x8];
SSAT x7, #9, x2;
ADD x8, x8, #32;
STR x7, [block, x8];
SSAT x7, #9, x4;
ADD x8, x8, #32;
STR x7, [block, x8];
SSAT x7, #9, x1;
ADD x8, x8, #32;
STR x7, [block, x8];
}
#endif
}
}
else /* Full idct for this column */
{
x0 = block[i];
x1 = block[8+i];
x2 = block[16+i];
x8 = x1 + x7;
x7 = x1 - x7;
x1 = x8;
x3 = L0 * x3;
x5 = L0 * x5;
x0 += 256; /* 2^8 added for proper rounding at the last stage */
x8 = x0 + x4;
x4 = x0 - x4;
x0 = x8;
t = L9B * (x2 + x6); /* Scale-rotate operation sqrt(2)*C6 */
x8 = L7B * x2 + t;
x2 = -L8B * x6 + t;
x6 = x8;
x8 = (x7 << 7) + x5;
x5 = ((x7 << 7) - x5) >> 5;
x7 = x8 >> 5;
x8 = (x1 << 7) + x3;
x3 = ((x1 << 7) - x3) >> 5;
x1 = x8 >> 5;
x8 = (x0 << 13) + x6;
x6 = (x0 << 13) - x6;
x0 = x8;
x8 = (x4 << 13) + x2;
x2 = (x4 << 13) - x2;
x4 = x8;
t = L3 * (x3 + x5); /* Scale-rotate operation C1 */
x8 = -L1 * x3 + t;
x3 = -L2 * x5 + t;
x5 = x8;
t = L6 * (x7 + x1); /* Scale-rotate operation C3 */
x8 = -L4 * x7 + t;
x7 = -L5 * x1 + t;
x1 = x8;
#ifndef MP4DEC_ARM11
block[i] = (i32)clp[(x0 + x1) >> 22];
block[8+i] = (i32)clp[(x4 + x5) >> 22];
block[16+i] = (i32)clp[(x2 + x3) >> 22];
block[24+i] = (i32)clp[(x6 + x7) >> 22];
block[32+i] = (i32)clp[(x6 - x7) >> 22];
block[40+i] = (i32)clp[(x2 - x3) >> 22];
block[48+i] = (i32)clp[(x4 - x5) >> 22];
block[56+i] = (i32)clp[(x0 - x1) >> 22];
#else
/* scale and write back to buffer
* saturated coefficients */
x8 = (x0 + x1) >> 22;
x1 = (x0 - x1) >> 22;
x0 = (x4 + x5) >> 22;
x4 = (x4 - x5) >> 22;
x5 = (x2 + x3) >> 22;
x2 = (x2 - x3) >> 22;
x3 = (x6 + x7) >> 22;
x6 = (x6 - x7) >> 22;
__asm
{
SSAT x7, #9, x8; /* signed saturation [-256,255] */
MOV x8, i, LSL #2; /* get address */
STR x7, [block, x8]; /* store coefficient */
SSAT x7, #9, x0;
ADD x8, x8, #32;
STR x7, [block, x8];
SSAT x7, #9, x5;
ADD x8, x8, #32;
STR x7, [block, x8];
SSAT x7, #9, x3;
ADD x8, x8, #32;
STR x7, [block, x8];
SSAT x7, #9, x6;
ADD x8, x8, #32;
STR x7, [block, x8];
SSAT x7, #9, x2;
ADD x8, x8, #32;
STR x7, [block, x8];
SSAT x7, #9, x4;
ADD x8, x8, #32;
STR x7, [block, x8];
SSAT x7, #9, x1;
ADD x8, x8, #32;
STR x7, [block, x8];
}
#endif
}
}
return;
}
/*------------------------------------------------------------------------------
5.3 Function name: SwDec_IdctIntra
Purpose: Calculate inverse DCT transform for a block
Data is one dimensional array, size is [64].
Input: *data Pointer to data before IDCT
numRows number of non-zero rows in data
mbNum Macroblock number
blockNum Block number
Output:
------------------------------------------------------------------------------*/
void SwDec_IdctIntra(decContainer_t *pDecContainer, i32 *data,
u32 mbNum, u32 blockNum)
{
u32 j;
u32 tmp;
u32 width, rowOffset, colOffset;
u8 *pOut;
u32 *pOut32;
ASSERT(data);
ASSERT(pDecContainer);
ASSERT(pDecContainer->StrmStorage.numIdctRows <= 8);
ASSERT(mbNum < pDecContainer->VopDesc.totalMbInVop);
ASSERT(blockNum < 6);
/* luminance */
if (blockNum < 4)
{
width = pDecContainer->VopDesc.vopWidth*16;
rowOffset = pDecContainer->StrmStorage.row[mbNum]*16 +
(blockNum>>1)*8;
colOffset = pDecContainer->StrmStorage.col[mbNum]*16 +
(blockNum&0x1)*8;
pOut = pDecContainer->pOut + rowOffset*width + colOffset;
}
/* chrominance */
else
{
width = pDecContainer->VopDesc.vopWidth*8;
rowOffset = pDecContainer->StrmStorage.row[mbNum]*8;
colOffset = pDecContainer->StrmStorage.col[mbNum]*8;
pOut = pDecContainer->pOut +
(256+(blockNum&0x1)*64)*pDecContainer->VopDesc.vopWidth*
pDecContainer->VopDesc.vopHeight +
rowOffset*width + colOffset;
}
if (!pDecContainer->StrmStorage.numIdctRows)
{
/*lint --e(826) */
pOut32 = (u32*)pOut;
/* only DC coeff,
* DC coeff is always positive for Intra blocks */
/*lint --e(568) */
if ( ((data[0]+4)>>3) < 0 )
tmp = 0;
else if ( ((data[0]+4)>>3) > 255 )
tmp = 255;
else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -