⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 idct_8x8.asm

📁 davinci技术 源码 视频监控汇编源码
💻 ASM
📖 第 1 页 / 共 3 页
字号:
*       All levels of looping are collapsed into single loops which are     *
*       pipelined.  The outer loop focuses on 8-pt IDCTs, whereas the       *
*       inner loop controls the column-pointer to handle jumps between      *
*       IDCT blocks.  (The column-pointer adjustment is handled by a        *
*       four-phase rotating "fixup" constant which takes the place of       *
*       the original inner-loop.)                                           *
*                                                                           *
*       For performance, portions of the outer-loop code have been          *
*       inter-scheduled with the prologs and epilogs of both loops.         *
*       Finally, cosine term registers are reused between the horizontal    *
*       and vertical loops to save the need for reinitialization.           *
*                                                                           *
*                                                                           *
*     ASSUMPTIONS                                                           *
*       This is a LITTLE ENDIAN implementation.                             *
*                                                                           *
*       The input array must be aligned on a double-word boundary.          *
*                                                                           *
*                                                                           *
*   NOTES                                                                   *
*      This function is fully interruptible.                                *
*                                                                           *
*   CYCLES                                                                  *
*       num_idcts * 72 + 63                                                 *
*                                                                           *
*       For num_idcts = 6,  cycles = 495                                    *
*                                                                           *
*   CODESIZE                                                                *
*       736 bytes                                                           *
*                                                                           *
* ------------------------------------------------------------------------- *
*             Copyright (c) 2005 Texas Instruments, Incorporated.           *
*                            All Rights Reserved.                           *
* ========================================================================= *


        .text        .global _idct_8x8_idct_8x8:                                                                
        .asg            A4,         A_data
        .asg            B4,         B_count
        .asg            B3,         B_ret

* ========================================================================= *
*       Horizon loop
* ========================================================================= *



* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
        .asg            B0,         B_c
        .asg            A8,         A_i_ptr
        .asg            B9,         B_i_ptr
        .asg            A9,         A_o_ptr
        .asg            B16,        B_o_ptr
        .asg            A17,        A_C71x
        .asg            B18,        B_C35x
        .asg            A18,        A_C44x
        .asg            B19,        B_C62x
        .asg            A19,        A_C00nx
        .asg            B25,        B_F76
        .asg            B24,        B_F54
        .asg            B23,        B_F32
        .asg            B22,        B_F10
        .asg            A23,        A_F76
        .asg            A22,        A_F54
        .asg            A21,        A_F32
        .asg            A20,        A_F10
        .asg            B22,        B_F17
        .asg            B21,        B_F53
        .asg            B7,         B_F26
        .asg            B8,         B_F04
        .asg            A20,        A_F17
        .asg            A25,        A_F53
        .asg            A7,         A_F26
        .asg            A23,        A_F04
        .asg            B27,        B_Q1S1
        .asg            B26,        B_Q0S0
        .asg            A27,        A_Q1S1
        .asg            A26,        A_Q0S0
        .asg            B23,        B_p1p0
        .asg            B17,        B_r1r0
        .asg            B27,        B_g2h2
        .asg            B26,        B_q0s0
        .asg            A21,        A_p1p0
        .asg            A16,        A_r1r0
        .asg            A27,        A_g2h2
        .asg            A26,        A_q0s0
        .asg            B29,        B_g1g0
        .asg            B28,        B_h1h0
        .asg            B24,        B_h3g3
        .asg            A29,        A_g1g0
        .asg            A28,        A_h1h0
        .asg            A22,        A_h3g3
        .asg            B24,        B_h3h2
        .asg            B20,        B_g3g2
        .asg            B30,        B_f10s
        .asg            B31,        B_f23
        .asg            B21,        B_f67
        .asg            B20,        B_f54s
        .asg            A24,        A_h3h2
        .asg            A25,        A_g3g2
        .asg            A30,        A_f10s
        .asg            A31,        A_f23
        .asg            A25,        A_f67
        .asg            A24,        A_f54s
        .asg            B21,        B_f76s
        .asg            B31,        B_f32s
        .asg            A25,        A_f76s
        .asg            A31,        A_f32s
* ========================================================================= *

        .asg            0xA57E,     cst_c0nx ;cos term -c0 (scaled by sqrt(2))
        .asg            0x5A82,     cst_c0x  ;cos term  c0 (scaled by sqrt(2))
        .asg            0x58C5,     cst_c1x  ;cos term  c1 (scaled by sqrt(2))
        .asg            0x539F,     cst_c2x  ;cos term  c2 (scaled by sqrt(2))
        .asg            0x4B42,     cst_c3x  ;cos term  c3 (scaled by sqrt(2))
        .asg            0x4000,     cst_c4x  ;cos term  c3 (scaled by sqrt(2))
        .asg            0x3249,     cst_c5x  ;cos term  c5 (scaled by sqrt(2))
        .asg            0x22A3,     cst_c6x  ;cos term  c6 (scaled by sqrt(2))
        .asg            0x11A8,     cst_c7x  ;cos term  c7 (scaled by sqrt(2))

        SHL             B_count,    2,          B_c
||      MV              A_data,     A_i_ptr
||      MV              A_data,     B_i_ptr
||      MVKL    .S1     cst_c4x,    A_C44x

        SUB             B_c,        1,          B_c
||[!B_c] B              B_ret

        SPLOOPD         6
||      MVC             B_c,        ILC
||      ADD             B_i_ptr,    16,         B_i_ptr
||      PACK2           A_C44x,     A_C44x,     A_C44x

*- Stage 0 -----------------------------------------------------------------*
        SPMASK
||      LDDW    .D2T2   *+B_i_ptr[1], B_F76:B_F54               ;[ 1,1]
||^     MVD     .M1     A_data,     A_o_ptr
||^     MVD     .M2     B_i_ptr,    B_o_ptr

        SPMASK
||      LDDW    .D1T1   *+A_i_ptr[1], A_F76:A_F54               ;[ 2,1]
||      LDDW    .D2T2   *B_i_ptr++[4], B_F32:B_F10              ;[ 2,1]
||^     MVKL    .S2     cst_c5x,    B_C35x
||^     MVKL    .S1     cst_c0nx,   A_C00nx

        SPMASK
||^     MVKLH   .S1     cst_c0x,    A_C00nx
||^     MVKLH   .S2     cst_c3x,    B_C35x

        LDDW    .D1T1   *A_i_ptr++[4], A_F32:A_F10              ;[ 4,1]

        SPMASK
||^     MVKL    .S1     cst_c1x,    A_C71x
||^     MVKL    .S2     cst_c2x,    B_C62x

        SPMASK
||^     MVKLH   .S1     cst_c7x,    A_C71x
||^     MVKLH   .S2     cst_c6x,    B_C62x

*- Stage 1 -----------------------------------------------------------------*
        PACK2   .S2     B_F32,      B_F76,      B_F26           ;[ 7,1]
||      PACKH2  .L2     B_F54,      B_F32,      B_F53           ;[ 7,1]

        PACK2   .S2     B_F10,      B_F54,      B_F04           ;[ 8,1]
||      CMPYR1  .M2     B_F53,      B_C35x,     B_Q0S0          ;[ 8,1]

        PACK2   .S1     A_F32,      A_F76,      A_F26           ;[ 9,1]
||      PACKH2  .L1     A_F54,      A_F32,      A_F53           ;[ 9,1]
||      CMPYR1  .M2     B_F26,      B_C62x,     B_r1r0          ;[ 9,1]

        PACK2   .S1     A_F10,      A_F54,      A_F04           ;[10,1]
||      PACKH2  .L1     A_F10,      A_F76,      A_F17           ;[10,1]
||      PACKH2  .S2     B_F10,      B_F76,      B_F17           ;[10,1]
||      CMPYR1  .M2X    B_F04,      A_C44x,     B_p1p0          ;[10,1]
||      CMPYR1  .M1X    A_F53,      B_C35x,     A_Q0S0          ;[10,1]

        CMPYR1  .M1X    A_F26,      B_C62x,     A_r1r0          ;[11,1]

        CMPYR1  .M1     A_F04,      A_C44x,     A_p1p0          ;[12,1]
||      CMPYR1  .M2X    B_F17,      A_C71x,     B_Q1S1          ;[12,1]

*- Stage 2 -----------------------------------------------------------------*
        NOP             1

        CMPYR1  .M1     A_F17,      A_C71x,     A_Q1S1          ;[14,1]

        NOP             1

        ADDSUB2 .L2     B_Q1S1,     B_Q0S0,     B_g2h2:B_q0s0   ;[16,1]

        CMPYR1  .M2X    B_q0s0,     A_C00nx,    B_h3g3          ;[17,1]

        ADDSUB2 .L2     B_p1p0,     B_r1r0,     B_g1g0:B_h1h0   ;[18,1]
||      ADDSUB2 .L1     A_Q1S1,     A_Q0S0,     A_g2h2:A_q0s0   ;[18,1]

*- Stage 3 -----------------------------------------------------------------*
        CMPYR1  .M1     A_q0s0,     A_C00nx,    A_h3g3          ;[19,1]

        ADDSUB2 .L1     A_p1p0,     A_r1r0,     A_g1g0:A_h1h0   ;[20,1]

        PACKLH2 .L2     B_h3g3,     B_g2h2,     B_g3g2          ;[21,1]
||      PACKHL2 .S2     B_h3g3,     B_g2h2,     B_h3h2          ;[21,1]

        SUB2    .D2     B_g1g0,     B_h3h2,     B_f67           ;[22,1]

        PACKLH2 .L1     A_h3g3,     A_g2h2,     A_g3g2          ;[23,1]
||      PACKHL2 .S1     A_h3g3,     A_g2h2,     A_h3h2          ;[23,1]
||      ADD2    .S2     B_h1h0,     B_g3g2,     B_f23           ;[23,1]
||      ADD2    .L2     B_g1g0,     B_h3h2,     B_f10s          ;[23,1]

        ADD2    .S1     A_g1g0,     A_h3h2,     A_f10s          ;[24,1]
||      SUB2    .D2     B_h1h0,     B_g3g2,     B_f54s          ;[24,1]

*- Stage 4 -----------------------------------------------------------------*
        SUB2    .S1     A_h1h0,     A_g3g2,     A_f54s          ;[25,1]
||      SUB2    .L1     A_g1g0,     A_h3h2,     A_f67           ;[25,1]
||      ADD2    .D1     A_h1h0,     A_g3g2,     A_f23           ;[25,1]
||      ROTL    .M2     B_f67,      16,         B_f76s          ;[25,1]

        SWAP2   .S1     A_f23,      A_f32s                      ;[26,1]
||      SWAP2   .L2     B_f23,      B_f32s                      ;[26,1]

        STDW    .D2T2   B_f76s:B_f54s, *+B_o_ptr[1]             ;[27,1]
||      ROTL    .M1     A_f67,      16,         A_f76s          ;[27,1]

        NOP             1

        STDW    .D1T1   A_f76s:A_f54s, *+A_o_ptr[1]             ;[29,1]
||      STDW    .D2T2   B_f32s:B_f10s, *B_o_ptr++[4]            ;[29,1]

        SPKERNEL        3, 5
||      STDW    .D1T1   A_f32s:A_f10s, *A_o_ptr++[4]            ;[30,1]


* ========================================================================= *
*       Vertical loop
* ========================================================================= *


* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
        .asg            B21,        B_i_ptr
        .asg            A21,        A_o_ptr
        .asg            B20,        B_k_fix
        .asg            A22,        A_k_fix
        .asg            A17,        A_C71x
        .asg            B18,        B_C35x
        .asg            A18,        A_C44x
        .asg            B19,        B_C62x
        .asg            A19,        A_C00nx
        .asg            A23,        A_rnd3
        .asg            B22,        B_fx1
        .asg            A9,         A_fx2
        .asg            B16,        B_F77
        .asg            B8,         B_F66
        .asg            B25,        B_F55
        .asg            B23,        B_F44
        .asg            B17,        B_F33
        .asg            B9,         B_F22
        .asg            B25,        B_F11
        .asg            B16,        B_F00
        .asg            B29,        B_F17x
        .asg            B28,        B_F17
        .asg            B25,        B_F53x

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -