📄 swdec_processblock.c
字号:
*pOut++ = (u8)((tmp1+tmp2+round)>>1);
tmp1 = *pRef++;
tmp2 = *ptmp++;
*pOut++ = (u8)((tmp3+tmp4+round)>>1);
tmp3 = *pRef;
tmp4 = *ptmp;
*pOut++ = (u8)((tmp1+tmp2+round)>>1);
*pOut++ = (u8)((tmp3+tmp4+round)>>1);
pRef += (width-7);
ptmp += (width-7);
}
} /* else -- Not in word boundary */
#else
if (round)
{
__asm
{
MOV i, #8; /* loop counter */
LDR tmp1, [pRef]; /* load four pels (first row) */
MOV c1, 0x01010101;
LDR tmp3, [pRef, #4]; /* load next four pels (first row) */
loop1:
TST i, 0x1; /* odd row? */
LDRNE tmp1, [pRef, width]!; /* even => load to tmp2 and tmp4 */
LDREQ tmp2, [pRef, width]!; /* odd => load to tmp1 and tmp3 */
LDRNE tmp3, [pRef, #4];
LDREQ tmp4, [pRef, #4];
UHADD8 tmpx, tmp1, tmp2; /* pixel wise addition and halving */
EOR tmpy, tmp1, tmp2; /* calculate proper rounding */
AND tmpy, tmpy, c1;
UADD8 tmpx, tmpx, tmpy; /* add correct "roud" */
STR tmpx, [pOut], #4; /* store interpolated 4 pixels */
UHADD8 tmpx, tmp3, tmp4;
EOR tmpy, tmp3, tmp4;
AND tmpy, tmpy, c1;
UADD8 tmpx, tmpx, tmpy;
STR tmpx, [pOut], #4;
SUBS i, i, #1;
BNE loop1;
}
}
else
{
__asm
{
MOV i, #8; /* loop counter */
LDR tmp1, [pRef]; /* load four pels (first row) */
NOP /* no two successive unaligned loads */
LDR tmp3, [pRef, #4]; /* load next four pels (first row) */
loop2:
TST i, 0x1; /* odd row? */
LDRNE tmp1, [pRef, width]!; /* even => load to tmp2 and tmp4 */
LDREQ tmp2, [pRef, width]!; /* odd => load to tmp1 and tmp3 */
LDRNE tmp3, [pRef, #4];
LDREQ tmp4, [pRef, #4];
UHADD8 tmpx, tmp1, tmp2; /* addition and halving (4 pels) */
STR tmpx, [pOut], #4; /* store interpolated four pixels */
UHADD8 tmpx, tmp3, tmp4;
STR tmpx, [pOut], #4;
SUBS i, i, #1;
BNE loop2;
}
}
#endif
}
/*------------------------------------------------------------------------------
5.9 Function name: SwDec_InterpolateHorizontal
Purpose: Interpolate data in reference picture if necessary
Input:
pRef pointer to reference picture
pOut pointer to interpolated block
width width of reference picture in pixels (whole macro
blocks)
round rounding control (should be 1-vop_rounding_type)
Output:
------------------------------------------------------------------------------*/
void SwDec_InterpolateHorizontal(u8 *pRef, u8 *pOut, u32 width, u32 round)
{
u32 i;
#if !defined(MP4DEC_ARM11) || !defined(MP4DEC_UNALIGNED)
u32 tmp1,tmp2,tmp3,tmp4;
#else
u32 tmp,tmp1,tmp2,tmp3,tmp4,c1;
#endif
ASSERT(pRef);
ASSERT(pOut);
ASSERT(width >= 8);
ASSERT(round <= 1);
#if !defined(MP4DEC_ARM11) || !defined(MP4DEC_UNALIGNED)
for(i = 8; i; i--)
{
tmp1 = *pRef++;
tmp2 = *pRef++;
tmp3 = *pRef++;
tmp4 = *pRef++;
tmp2 += round;
tmp4 += round;
*pOut++ = (u8)((tmp1 + tmp2)>>1);
*pOut++ = (u8)((tmp2 + tmp3)>>1);
*pOut++ = (u8)((tmp3 + tmp4)>>1);
tmp1 = *pRef++;
tmp2 = *pRef++;
tmp3 = *pRef++;
*pOut++ = (u8)((tmp4 + tmp1)>>1);
tmp4 = *pRef++;
tmp2 += round;
*pOut++ = (u8)((tmp1 + tmp2)>>1);
*pOut++ = (u8)((tmp2 + tmp3)>>1);
tmp4 += round;
tmp1 = *pRef++;
*pOut++ = (u8)((tmp3 + tmp4)>>1);
*pOut++ = (u8)((tmp4 + tmp1)>>1);
pRef += (width-9);
}
#else
if (round)
{
__asm
{
LDR tmp1, [pRef]; /* load bytes 0-7 to tmp1 and tmp3 */
NOP /* no two successive unaligned loads */
LDR tmp3, [pRef, #4];
ADD pRef, pRef, #1; /* increment pointer by one */
LDR tmp2, [pRef]; /* load bytes 1-8 to tmp2 and tmp4 */
MOV c1, 0x01010101;
LDR tmp4, [pRef, #4];
SUB pRef, pRef, #1; /* decrement pointer by one */
UHADD8 tmp, tmp1, tmp2; /* addition and halving */
EOR tmp1, tmp1, tmp2; /* calculate correct "round" */
AND tmp1, tmp1, c1;
UADD8 tmp, tmp, tmp1; /* add "round" */
STR tmp, [pOut], #4; /* store four interpolated pixels */
UHADD8 tmp, tmp3, tmp4;
EOR tmp3, tmp3, tmp4;
AND tmp3, tmp3, c1;
UADD8 tmp, tmp, tmp3;
STR tmp, [pOut], #4;
MOV i, #7;
loop1:
LDR tmp1, [pRef, width]!; /* next row to tmp1 and tmp3 */
NOP /* no two successive unaligned loads */
LDR tmp3, [pRef, #4];
ADD pRef, pRef, #1;
LDR tmp2, [pRef]; /* load bytes 1-8 to tmp2 and tmp4 */
NOP
LDR tmp4, [pRef, #4];
SUB pRef, pRef, #1; /* decrement pointer by one */
UHADD8 tmp, tmp1, tmp2; /* addition and halving */
EOR tmp1, tmp1, tmp2; /* calculate correct "round" */
AND tmp1, tmp1, c1;
UADD8 tmp, tmp, tmp1; /* add "round" */
STR tmp, [pOut], #4; /* store four interpolated pixels */
UHADD8 tmp, tmp3, tmp4;
EOR tmp3, tmp3, tmp4;
AND tmp3, tmp3, c1;
UADD8 tmp, tmp, tmp3;
STR tmp, [pOut], #4;
SUBS i, i, #1;
BNE loop1;
}
}
else
{
__asm
{
LDR tmp1, [pRef]; /* load bytes 0-7 to tmp1 and tmp3 */
NOP /* no two successive unaligned loads */
LDR tmp3, [pRef, #4];
ADD pRef, pRef, #1;
LDR tmp2, [pRef]; /* load bytes 1-8 to tmp2 and tmp4 */
NOP
LDR tmp4, [pRef, #4];
SUB pRef, pRef, #1;
UHADD8 tmp1, tmp1, tmp2; /* addition and halving */
UHADD8 tmp3, tmp3, tmp4;
STR tmp1, [pOut], #4; /* store words */
STR tmp3, [pOut], #4;
MOV i, #7;
loop2:
LDR tmp1, [pRef, width]!; /* next row */
NOP
LDR tmp3, [pRef, #4];
ADD pRef, pRef, #1;
LDR tmp2, [pRef];
NOP
LDR tmp4, [pRef, #4];
SUB pRef, pRef, #1;
UHADD8 tmp1, tmp1, tmp2;
UHADD8 tmp3, tmp3, tmp4;
STR tmp1, [pOut], #4;
STR tmp3, [pOut], #4;
SUBS i, i, #1;
BNE loop2;
}
}
#endif
}
/*------------------------------------------------------------------------------
5.10 Function name: SwDec_InterpolateBoth
Purpose: Interpolate data in reference picture if necessary
Input:
pRef pointer to reference picture
pOut pointer to interpolated block
width width of reference picture in pixels (whole macro
blocks)
round rounding control (should be 1-vop_rounding_type)
Output:
------------------------------------------------------------------------------*/
void SwDec_InterpolateBoth(u8 *pRef, u8 *pOut, u32 width, u32 round)
{
u32 i;
u32 tmp1,tmp2,tmp3,tmp4,tmp5,tmp6;
u8 *ptmp;
#if !defined(MP4DEC_ARM11) || !defined(MP4DEC_UNALIGNED)
u32 next;
#endif
ASSERT(pRef);
ASSERT(pOut);
ASSERT(width >= 8);
ASSERT(round <= 1);
ptmp = pRef + width;
round++;
#if !defined(MP4DEC_ARM11) || !defined(MP4DEC_UNALIGNED)
next = (width-8);
for ( i = 8; i; i--)
{
tmp1 = *pRef++;
tmp3 = *pRef++;
tmp2 = *ptmp++;
tmp4 = *ptmp++;
*pOut++ = (u8)((round+tmp1+tmp2+(tmp5=tmp3+tmp4))>>2);
tmp1 = *pRef++;
tmp3 = *pRef++;
tmp2 = *ptmp++;
tmp4 = *ptmp++;
tmp6=tmp1+tmp2;
*pOut++ = (u8)((round+tmp5+tmp6)>>2);
tmp5 = tmp3+tmp4+round;
tmp1 = *pRef++;
tmp3 = *pRef++;
tmp2 = *ptmp++;
tmp4 = *ptmp++;
*pOut++ = (u8)((tmp6+tmp5)>>2);
tmp6=tmp1+tmp2;
*pOut++ = (u8)((tmp5+tmp6)>>2);
tmp5 = tmp3+tmp4+round;
tmp1 = *pRef++;
tmp3 = *pRef++;
tmp2 = *ptmp++;
tmp4 = *ptmp++;
*pOut++ = (u8)((tmp6+tmp5)>>2);
tmp6=tmp1+tmp2;
*pOut++ = (u8)((tmp5+tmp6)>>2);
tmp5 = tmp3+tmp4+round;
tmp1 = *pRef;
tmp2 = *ptmp;
*pOut++ = (u8)((tmp6+tmp5)>>2);
*pOut++ = (u8)((tmp5+tmp1+tmp2)>>2);
pRef += next;
ptmp += next;
}
#else
if (round == 1)
{
__asm
{
MOV i, #8;
loop1:
LDR tmp1, [pRef]; /* bytes 0-3, first row */
LDR tmp2, [ptmp]; /* bytes 0-3, second row */
ADD pRef, pRef, #1; /* increment pointers by one */
ADD ptmp, ptmp, #1;
LDR tmp3, [pRef]; /* bytes 1-4, first row */
LDR tmp4, [ptmp]; /* bytes 1-4, second row */
UUNPK8TO16 tmp5, tmp1; /* pack two pels (bytes 0 and 2) */
UADD8TO16 tmp5, tmp5, tmp2; /* add corresponding two pels */
UADD8TO16 tmp5, tmp5, tmp3; /* add corresponding two pels */
UADD8TO16 tmp5, tmp5, tmp4; /* add corresponding two pels */
UADD16 tmp5, tmp5, 0x00010001; /* add "round" */
AND tmp5, 0x00FF00FF, tmp5, LSR #2; /* divide by 4, mask */
UUNPK8TO16 tmp6, tmp1, ROR #8; /* pack pels (bytes 1 and 3) */
UADD8TO16 tmp6, tmp6, tmp2, ROR #8; /* add two pels */
UADD8TO16 tmp6, tmp6, tmp3, ROR #8; /* add two pels */
UADD8TO16 tmp6, tmp6, tmp4, ROR #8; /* add two pels */
UADD16 tmp6, tmp6, 0x00010001; /* add "round" */
AND tmp6, 0x00FF00FF, tmp6, LSR #2; /* divide, mask */
ORR tmp5, tmp5, tmp6, LSL #8; /* pack to one word */
STR tmp5, [pOut], #4; /* store word */
/* AND SO ON... */
ADD pRef, pRef, #3;
ADD ptmp, ptmp, #3;
LDR tmp1, [pRef];
LDR tmp3, [ptmp];
ADD pRef, pRef, #1;
ADD ptmp, ptmp, #1;
LDR tmp2, [pRef];
LDR tmp4, [ptmp];
UUNPK8TO16 tmp5, tmp1;
UADD8TO16 tmp5, tmp5, tmp2;
UADD8TO16 tmp5, tmp5, tmp3;
UADD8TO16 tmp5, tmp5, tmp4;
UADD16 tmp5, tmp5, 0x00010001;
AND tmp5, 0x00FF00FF, tmp5, LSR #2;
UUNPK8TO16 tmp6, tmp1, ROR #8;
UADD8TO16 tmp6, tmp6, tmp2, ROR #8;
UADD8TO16 tmp6, tmp6, tmp3, ROR #8;
UADD8TO16 tmp6, tmp6, tmp4, ROR #8;
UADD16 tmp6, tmp6, 0x00010001;
AND tmp6, 0x00FF00FF, tmp6, LSR #2;
ORR tmp5, tmp5, tmp6, LSL #8;
STR tmp5, [pOut], #4;
SUB pRef, pRef, #5;
SUB ptmp, ptmp, #5;
ADD pRef, pRef, width;
ADD ptmp, ptmp, width;
SUBS i, i, #1;
BNE loop1;
}
}
else
{
__asm
{
MOV i, #8;
loop2:
LDR tmp1, [pRef];
LDR tmp3, [ptmp];
ADD pRef, pRef, #1;
ADD ptmp, ptmp, #1;
LDR tmp2, [pRef];
LDR tmp4, [ptmp];
UUNPK8TO16 tmp5, tmp1;
UADD8TO16 tmp5, tmp5, tmp2;
UADD8TO16 tmp5, tmp5, tmp3;
UADD8TO16 tmp5, tmp5, tmp4;
UADD16 tmp5, tmp5, 0x00020002;
AND tmp5, 0x00FF00FF, tmp5, LSR #2;
UUNPK8TO16 tmp6, tmp1, ROR #8;
UADD8TO16 tmp6, tmp6, tmp2, ROR #8;
UADD8TO16 tmp6, tmp6, tmp3, ROR #8;
UADD8TO16 tmp6, tmp6, tmp4, ROR #8;
UADD16 tmp6, tmp6, 0x00020002;
AND tmp6, 0x00FF00FF, tmp6, LSR #2;
ORR tmp5, tmp5, tmp6, LSL #8;
STR tmp5, [pOut], #4;
ADD pRef, pRef, #3;
ADD ptmp, ptmp, #3;
LDR tmp1, [pRef];
LDR tmp3, [ptmp];
ADD pRef, pRef, #1;
ADD ptmp, ptmp, #1;
LDR tmp2, [pRef];
LDR tmp4, [ptmp];
UUNPK8TO16 tmp5, tmp1;
UADD8TO16 tmp5, tmp5, tmp2;
UADD8TO16 tmp5, tmp5, tmp3;
UADD8TO16 tmp5, tmp5, tmp4;
UADD16 tmp5, tmp5, 0x00020002;
AND tmp5, 0x00FF00FF, tmp5, LSR #2;
UUNPK8TO16 tmp6, tmp1, ROR #8;
UADD8TO16 tmp6, tmp6, tmp2, ROR #8;
UADD8TO16 tmp6, tmp6, tmp3, ROR #8;
UADD8TO16 tmp6, tmp6, tmp4, ROR #8;
UADD16 tmp6, tmp6, 0x00020002;
AND tmp6, 0x00FF00FF, tmp6, LSR #2;
ORR tmp5, tmp5, tmp6, LSL #8;
STR tmp5, [pOut], #4;
SUB pRef, pRef, #5;
SUB ptmp, ptmp, #5;
ADD pRef, pRef, width;
ADD ptmp, ptmp, width;
SUBS i, i, #1;
BNE loop2;
}
}
#endif
}
/*------------------------------------------------------------------------------
5.11 Function name: SwDec_AcDcPrediction
Purpose: perform decoding of dc and ac prediction. Also perform
inverse quantization for dc coefficient and 1. row and 1. column
of block.
Input:
pDecContainer pointer to decContainer_t
pData pointer to block data before prediction
mbNum macro block number
blockNum block number
scanDir SCAN_ZIGZAG, SCAN_HOR or SCAN_VER
Output:
------------------------------------------------------------------------------*/
#ifndef MP4DEC_H263_ONLY
void SwDec_AcDcPrediction(decContainer_t* pDecContainer, i32 *pData,
u32 mbNum, u32 blockNum, u32 scanDir)
{
u32 i;
i32 *p1,*p2;
u32 QP;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -