📄 swdec_idct.c

📁 freescale i.mx31 BSP CE5.0全部源码
💻 C
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
            tmp = (data[0]+4)>>3;
        
        tmp |= tmp<<8;
        tmp |= tmp<<16;
        tmp |= tmp<<24;
        
        for (j = 4; j; j--)
        {
            pOut32[0] = tmp;
            pOut32[1] = tmp;
            pOut32 += width>>2;
            pOut32[0] = tmp;
            pOut32[1] = tmp;
            pOut32 += width>>2;
        }
    } else if (pDecContainer->StrmStorage.numIdctRows == 1) {
        SwDec_IdctIntraBlock(data, 1, pOut, width);     /* 1-row IDCT */
    } else if (pDecContainer->StrmStorage.numIdctRows <= 3) {
        SwDec_IdctIntraBlock(data, 3, pOut, width);     /* 3-row IDCT */
    } else if (pDecContainer->StrmStorage.numIdctRows <= 5) {
        SwDec_IdctIntraBlock(data, 5, pOut, width);     /* 5-row IDCT */
    } else {
        SwDec_IdctIntraBlock(data, 8, pOut, width);     /* full 8-row IDCT */
    }
    return;
}
/*------------------------------------------------------------------------------

   5.4  Function name: SwDec_IdctIntraBlock

        Functional description:
          Function counts 2-D idct using Loeffler's fast algorithm.
          Improved version with single row and zero-row shortcuts.
          In case of Intra block, transformed coefficients are write
          directly to the output picture.

        Inputs:
          i32 *block             Block data before idct, length 64
          u32 rows               Rows in idct
          u8 *pOut               pointer to output picture
          u32 width              width of picture in pels

        Outputs:
    
------------------------------------------------------------------------------*/
void SwDec_IdctIntraBlock(i32 *block, u32 rows, u8 *pOut, u32 width)
{

/* Variables */
    i32 i, t, x0, x1, x2, x3, x4, x5, x6, x7, x8;
#ifndef MP4DEC_ARM11
    const u8* const clp = clipIntra+512;
#endif

/* Code */

    /* Horizontal transform */
    for (i = (i32)(rows<<3)-8; i >=0; i-=8)
    {
        /*  If all coefficients x1..x7 == 0, the first coefficient x0
            defines the idct. This 1-D shortcut minimizes the counting.
        */
        if (!((x1 = block[i+1]) | (x2 = block[i+2]) | (x3 = block[i+3]) | 
              (x4 = block[i+4]) | (x5 = block[i+5]) | (x6 = block[i+6]) | 
              (x7 = block[i+7])))
        {
            block[i+1]=block[i+2]=block[i+3]=block[i+4]=
                block[i+5]=block[i+6]=block[i+7]=
                block[i]=block[i] << 6;
        }
        else    /* Full idct for this row */
        {
            x0 = block[i]; 

            x8 = x1 + x7;
            x7 = x1 - x7;
            x1 = x8;
            x3 = L0 * x3;
            x5 = L0 * x5;

            x8 = x0 + x4;
            x4 = x0 - x4;
            x0 = x8;

            t = L9 * (x2 + x6);       /* Scale-rotate operation sqrt(2)*C6 */
            x8 = L7 * x2 + t;
            x2 = -L8 * x6 + t;
            x6 = x8;

            x8 = (x7 << 7) + x5;
            x5 = (x7 << 7) - x5;
            x7 = x8;
            x8 = (x1 << 7) + x3;
            x3 = (x1 << 7) - x3;
            x1 = x8;

            /* 2^11 added for proper rounding at the last stage */
            x0 = (x0 << 18) + 2048;
            x4 = (x4 << 18) + 2048; 

            x8 = x0 + x6;
            x6 = x0 - x6;
            x0 = x8;
            x8 = x4 + x2;
            x2 = x4 - x2;
            x4 = x8;

            t = L3 * (x3 + x5);       /* Scale-rotate operation C1 */
            x8 = -L1 * x3 + t;
            x3 = -L2 * x5 + t;
            x5 = x8;

            t = L6 * (x7 + x1);       /* Scale-rotate operation C3 */
            x8 = -L4 * x7 + t;
            x7 = -L5 * x1 + t;
            x1 = x8;

            block[i+0] = (x0 + x1) >> 12;
            block[i+1] = (x4 + x5) >> 12;
            block[i+2] = (x2 + x3) >> 12;
            block[i+3] = (x6 + x7) >> 12;
            block[i+4] = (x6 - x7) >> 12;
            block[i+5] = (x2 - x3) >> 12;
            block[i+6] = (x4 - x5) >> 12;
            block[i+7] = (x0 - x1) >> 12;
        }

    }   /* End of horizontal transform */


    /* Vertical transform */
    for (i = 7; i >= 0; i--)
    {
        /* 2-stage zero-1D shortcut check separates three column idct cases:
           7 zero coeffs, 5 zero coeffs and less than 5 zero coeffs. 
           The more zero coeffs are found the less counting is needed.
        */
        if (!((x3 = block[24+i]) | (x4 = block[32+i]) | (x5 = block[40+i]) |
              (x6 = block[48+i]) | (x7 = block[56+i])))       
        {                               /* atleast 5 last coeffs are 0 */

            if (!((x1 = block[8+i]) | (x2 = block[16+i])))    
            {                           /* atleast 7 last coeffs are 0 */

                x0 = (block[i] + 256) >> 9; 

                if (x0 > 255) x0 = 255;
                if (x0 < 0) x0 = 0;
                
                /*lint -e737*/
                pOut[i]=pOut[width+i]=pOut[2*width+i]=pOut[3*width+i]=
                    pOut[4*width+i]=pOut[5*width+i]=pOut[6*width+i]=
                    pOut[7*width+i]=(u8)x0;
                /*lint +e737*/
            }
            else    /*  5 last coeffs (x7, x6, x5, x4 and x3) are 0 */
            {
                x0 = block[i]; 

                x7 = x1;

                x0 += 256;          /* 2^8 added for proper rounding */

                x4 = x0;

                t = L9B * x2;       /* Scale-rotate operation sqrt(2)*C6 */
                x6 = L7B * x2 + t;
                x2 = t;

                x7 = x7 << 2;
                x5 = x7;
                x1 = x1 << 2;
                x3 = x1;

                x8 = (x0 << 13) + x6;
                x6 = (x0 << 13) - x6;
                x0 = x8;
                x8 = (x4 << 13) + x2;
                x2 = (x4 << 13) - x2;
                x4 = x8;

                t = L3 * (x3 + x5);       /* Scale-rotate operation C1 */
                x8 = -L1 * x3 + t;
                x3 = -L2 * x5 + t;
                x5 = x8;

                t = L6 * (x7 + x1);       /* Scale-rotate operation C3 */
                x8 = -L4 * x7 + t;
                x7 = -L5 * x1 + t;
                x1 = x8;
#ifndef MP4DEC_ARM11
                /*lint -e737*/
                pOut[i]         = (u8)clp[(x0 + x1) >> 22];
                pOut[width+i]   = (u8)clp[(x4 + x5) >> 22];
                pOut[2*width+i] = (u8)clp[(x2 + x3) >> 22];
                pOut[3*width+i] = (u8)clp[(x6 + x7) >> 22];
                pOut[4*width+i] = (u8)clp[(x6 - x7) >> 22];
                pOut[5*width+i] = (u8)clp[(x2 - x3) >> 22];
                pOut[6*width+i] = (u8)clp[(x4 - x5) >> 22];
                pOut[7*width+i] = (u8)clp[(x0 - x1) >> 22];
                /*lint +e737*/
#else
                /* Scale and write saturated coefficients 
                 * to the output picture. Saturation is done for
                 * two pixels at a time by using ARM11 SIMD
                 * instructions. */
                x8 = (x0 + x1) >> 22;
                x1 = (x0 - x1) >> 22;
                x0 = (x4 + x5) >> 22;
                x4 = (x4 - x5) >> 22;
                x5 = (x2 + x3) >> 22;
                x2 = (x2 - x3) >> 22;
                x3 = (x6 + x7) >> 22;
                x6 = (x6 - x7) >> 22;
                
                __asm
                {
                    PKHBT   x7, x8, x0, LSL #16; /* pack two coefficients */
                    PKHBT   x8, x5, x3, LSL #16; 
                    USAT16  x7, #8, x7;     /* saturate two coefficients.. */
                    USAT16  x8, #8, x8;     /* at a time to range [0,255]  */
                    
                    MOV     x0, i;              /* get address */
                    STRB    x7, [pOut, x0];     /* store coefficient */
                    ADD     x0, x0, width;      /* update address */
                    MOV     x7, x7, ASR #16;    /* get upper 16 bits */
                    STRB    x7, [pOut, x0];     /* store coefficient */
                    ADD     x0, x0, width;      /* and so on ... */
                    STRB    x8, [pOut, x0];
                    ADD     x0, x0, width;
                    MOV     x8, x8, ASR #16;
                    STRB    x8, [pOut, x0];

                    PKHBT   x7, x6, x2, LSL #16;
                    PKHBT   x8, x4, x1, LSL #16;
                    USAT16  x7, #8, x7;
                    USAT16  x8, #8, x8;
                    
                    ADD     x0, x0, width;
                    STRB    x7, [pOut, x0];
                    ADD     x0, x0, width;
                    MOV     x7, x7, ASR #16;
                    STRB    x7, [pOut, x0];
                    ADD     x0, x0, width;
                    STRB    x8, [pOut, x0];
                    ADD     x0, x0, width;
                    MOV     x8, x8, ASR #16;
                    STRB    x8, [pOut, x0];
                }
#endif
            }
        }
        else    /* Full idct for this column */ 
        {
            x0 = block[i]; 
            x1 = block[8+i]; 
            x2 = block[16+i];

            x8 = x1 + x7;
            x7 = x1 - x7;
            x1 = x8;
            x3 = L0 * x3;
            x5 = L0 * x5;

            x0 += 256;  /* 2^8 added for proper rounding at the last stage */

            x8 = x0 + x4;
            x4 = x0 - x4;
            x0 = x8;


            t = L9B * (x2 + x6);      /* Scale-rotate operation sqrt(2)*C6 */
            x8 = L7B * x2 + t;
            x2 = -L8B * x6 + t;
            x6 = x8;

            x8 = (x7 << 7) + x5;
            x5 = ((x7 << 7) - x5) >> 5;
            x7 = x8 >> 5;
            x8 = (x1 << 7) + x3;
            x3 = ((x1 << 7) - x3) >> 5;
            x1 = x8 >> 5;

            x8 = (x0 << 13) + x6;
            x6 = (x0 << 13) - x6;
            x0 = x8;
            x8 = (x4 << 13) + x2;
            x2 = (x4 << 13) - x2;
            x4 = x8;

            t = L3 * (x3 + x5);       /* Scale-rotate operation C1 */
            x8 = -L1 * x3 + t;
            x3 = -L2 * x5 + t;
            x5 = x8;

            t = L6 * (x7 + x1);       /* Scale-rotate operation C3 */
            x8 = -L4 * x7 + t;
            x7 = -L5 * x1 + t;
            x1 = x8;
#ifndef MP4DEC_ARM11
            /*lint -e737*/
            pOut[i]         = (u8)clp[(x0 + x1) >> 22];
            pOut[width+i]   = (u8)clp[(x4 + x5) >> 22];
            pOut[2*width+i] = (u8)clp[(x2 + x3) >> 22];
            pOut[3*width+i] = (u8)clp[(x6 + x7) >> 22];
            pOut[4*width+i] = (u8)clp[(x6 - x7) >> 22];
            pOut[5*width+i] = (u8)clp[(x2 - x3) >> 22];
            pOut[6*width+i] = (u8)clp[(x4 - x5) >> 22];
            pOut[7*width+i] = (u8)clp[(x0 - x1) >> 22];
            /*lint +e737*/
#else
            /* Scale and write saturated coefficients 
             * to the output picture. Saturation is done for
             * two pixels at a time by using ARM11 SIMD
             * instructions. */
            x8 = (x0 + x1) >> 22;
            x1 = (x0 - x1) >> 22;
            x0 = (x4 + x5) >> 22;
            x4 = (x4 - x5) >> 22;
            x5 = (x2 + x3) >> 22;
            x2 = (x2 - x3) >> 22;
            x3 = (x6 + x7) >> 22;
            x6 = (x6 - x7) >> 22;
            
            __asm
            {
                PKHBT   x7, x8, x0, LSL #16; /* pack two coefficients */
                PKHBT   x8, x5, x3, LSL #16;
                USAT16  x7, #8, x7;     /* saturate two coefficients.. */
                USAT16  x8, #8, x8;     /* at a time to range [0,255]  */
                
                MOV     x0, i;              /* get address */
                STRB    x7, [pOut, x0];     /* store coefficient */
                ADD     x0, x0, width;      /* update address */
                MOV     x7, x7, ASR #16;    /* get upper 16 bits */
                STRB    x7, [pOut, x0];     /* store coefficient */
                ADD     x0, x0, width;      /* and so on ... */
                STRB    x8, [pOut, x0];
                ADD     x0, x0, width;
                MOV     x8, x8, ASR #16;
                STRB    x8, [pOut, x0];

                PKHBT   x7, x6, x2, LSL #16;
                PKHBT   x8, x4, x1, LSL #16;
                USAT16  x7, #8, x7;
                USAT16  x8, #8, x8;
                
                ADD     x0, x0, width;
                STRB    x7, [pOut, x0];
                ADD     x0, x0, width;
                MOV     x7, x7, ASR #16;
                STRB    x7, [pOut, x0];
                ADD     x0, x0, width;
                STRB    x8, [pOut, x0];
                ADD     x0, x0, width;
                MOV     x8, x8, ASR #16;
                STRB    x8, [pOut, x0];
            }
#endif
        }
    }
    return;
}

/*lint +e701 +e702 */
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -