📄 inverse discrete cosine transform.txt

📁 c6000的应用程序比较常用比如说fft ifft等一些原文件
💻 TXT
📖 第 1 页 / 共 5 页
字号:
||      ADD     .D1     A_i,        1,          A_i             ;[19,1] 
||      SHR     .S1     A_x3,       trunc1,     A_x3t           ;[19,1] 
||      ADD     .L1X    A_g3a,      B_g3b,      A_g3            ;[19,1] 
||      ADD     .S2X    A_X2c2,     B_X6c6,     B_r0            ;[ 9,2] 
||      MPYH    .M1     A_X3X2,     A_c3c1,     A_X3c3          ;[ 9,2] 
||      MPYHL   .M2     B_X5X4,     B_c7c5,     B_X5c5          ;[ 9,2] 

h_loop_1:
        ADD     .L2     B_g1,       B_h3n,      B_x6            ;[20,1] 
||[!A1] STH     .D2T1   A_x3t,      * B_o_ptr--[1]              ;[20,1] 
||      ADD     .S1     A_h1,       A_g3,       A_x2            ;[20,1] 
||      SUB     .D1     A_h1,       A_g3,       A_x5            ;[20,1] 
||      ADD     .L1X    A_P0,       B_P1,       A_p0            ;[10,2] 
||      MPYHL   .M1     A_X1X0,     A_c7c5,     A_X1c5          ;[10,2] 
||      MPYHL   .M2     B_X7X6,     B_c3c1,     B_X7c1          ;[10,2] 

h_loop_2:
        SHR     .S1     A_x5,       trunc1,     A_x5t           ;[21,1] 
||      SHR     .S2     B_x1,       trunc1,     B_x1t           ;[21,1] 
||      ADD     .L1     A_X1c1,     A_X3c3,     A_h2a           ;[11,2] 
||      ADD     .L2     B_X5c5,     B_X7c7,     B_h2b           ;[11,2] 
||      MPYH    .M1     A_X1X0,     A_c3c1,     A_X1c3          ;[11,2] 
||      MPYH    .M2     B_X5X4,     B_c7c5,     B_X5c7          ;[11,2] 
||      LDW     .D1T1   * A_i_ptr--[4],         A_X1X0          ;[ 1,3] 
||      LDW     .D2T2   *+B_i_ptr[1],           B_X7X6          ;[ 1,3] 

h_loop_3:
        SHR     .S2     B_x6,       trunc1,     B_x6t           ;[22,1] 
||      SHR     .S1     A_x2,       trunc1,     A_x2t           ;[22,1] 
||      SUB     .L1X    A_p0,       B_r0,       A_h0            ;[12,2] 
||      ADD     .L2X    A_h2a,      B_h2b,      B_h2            ;[12,2] 
||      MPYH    .M1     A_X3X2,     A_c7c5,     A_X3c7          ;[12,2] 
||      MPYH    .M2     B_X5X4,     B_c3c1,     B_X5c3          ;[12,2] 
||      LDW     .D1T1   *+A_i_ptr[5],           A_X3X2          ;[ 2,3] 
||      LDW     .D2T2   * B_i_ptr--[4],         B_X5X4          ;[ 2,3] 

h_loop_4:
  [ B_o]B       .S2     h_loop                                  ;[23,1] 
||      STH     .D1T1   A_x5t,      *+A_o_ptr[8]                ;[23,1] 
||      SHR     .S1     A_x4,       trunc1,     A_x4t           ;[23,1] 
||      ADD     .L2X    A_p0,       B_r0,       B_g0            ;[13,2] 
||[ B_o]SUB     .D2     B_o,        1,          B_o             ;[13,2] 
||[!A1] AND     .L1     A_i,        7,          A_i             ;[13,2] 
||      MPYHL   .M1     A_X3X2,     A_c7c5,     A_X3c5          ;[13,2] 
||      MPYHL   .M2     B_X5X4,     B_c3c1,     B_X5c1          ;[13,2] 

h_loop_5:
  [!A1] STH     .D1T1   A_x4t,      * A_o_ptr--[1]              ;[24,1] 
||      SUB     .S1     A_X1c3,     A_X3c7,     A_h3a           ;[14,2] 
||      SUB     .L1X    A_P0,       B_P1,       A_p1            ;[14,2] 
||      ADD     .S2     B_g0,       B_h2,       B_x0            ;[14,2] 
||      SUB     .L2     B_X5c3,     B_X7c1,     B_g2b           ;[14,2] 
||      MPYHL   .M1     A_X3X2,     A_c3c1,     A_X3c1          ;[14,2] 
||      MPY     .M2     B_X7X6,     B_c6c2,     B_X6c2          ;[14,2] 

h_loop_6:
        STH     .D1T2   B_x6t,      *+A_o_ptr[17]               ;[25,1] 
||      SUB     .D2     B_g0,       B_h2,       B_x7            ;[15,2] 
||      SHR     .S2     B_x0,       trunc1,     B_x0t           ;[15,2] 
||      SUB     .S1     A_X1c7,     A_X3c5,     A_g2a           ;[15,2] 
||      ADD     .L2     B_X5c1,     B_X7c5,     B_h3b           ;[15,2] 
||      MPYLH   .M1X    A_X3X2,     B_c6c2,     A_X2c6          ;[15,2] 
||      MPYH    .M2     B_X7X6,     B_c3c1,     B_X7c3          ;[15,2] 
||[ A1] ADD     .L1     A1,         1,          A1

h_loop_7:
  [!A_i]SUBAW   .D1     A_o_ptr,    28,         A_o_ptr         ;[26,1] 
||      STH     .D2T2   B_x1t,      *-B_o_ptr[15]               ;[26,1] 
||      SHR     .S2     B_x7,       trunc1,     B_x7t           ;[16,2] 
||      SUB     .L1     A_X1c5,     A_X3c1,     A_g3a           ;[16,2] 
||      SUB     .L2X    B_h3b,      A_h3a,      B_h3n           ;[16,2] 
||      ADD     .S1X    A_g2a,      B_g2b,      A_g2            ;[16,2] 
||      MPYH    .M1     A_X1X0,     A_c7c5,     A_X1c7          ;[ 6,3] 
||      MPYLH   .M2     B_X7X6,     B_c6c2,     B_X6c6          ;[ 6,3] 

h_loop_8:
        STH     .D2T1   A_x2t,      *-B_o_ptr[7]                ;[27,1] 
||      ADD     .L1     A_h0,       A_g2,       A_x3            ;[17,2] 
||      SUB     .D1     A_h0,       A_g2,       A_x4            ;[17,2] 
||      SUB     .L2X    A_X2c6,     B_X6c2,     B_r1            ;[17,2] 
||      EXT     .S1     A_X1X0,     kq_a, kq_b, A_P0            ;[ 7,3] 
||      EXT     .S2     B_X5X4,     kq_a, kq_b, B_P1            ;[ 7,3] 
||      MPY     .M1X    A_X3X2,     B_c6c2,     A_X2c2          ;[ 7,3] 
||      MPYHL   .M2     B_X7X6,     B_c7c5,     B_X7c5          ;[ 7,3] 

h_loop_9:
  [!A_i]SUBAW   .D2     B_o_ptr,    28,         B_o_ptr         ;[28,1] 
||      STH     .D1T2   B_x7t,      *+A_o_ptr[24]               ;[18,2] 
||      ADD     .S2X    A_p1,       B_r1,       B_g1            ;[18,2] 
||      SUB     .L1X    A_p1,       B_r1,       A_h1            ;[18,2] 
||      ADD     .L2     B_X5c7,     B_X7c3,     B_g3b           ;[18,2] 
||      ADDK    .S1     256,        A_P0                        ;[ 8,3] 
||      MPYHL   .M1     A_X1X0,     A_c3c1,     A_X1c1          ;[ 8,3] 
||      MPYH    .M2     B_X7X6,     B_c7c5,     B_X7c7          ;[ 8,3] 

; ============================ PIPE LOOP EPILOG ==============================
h_epilog:
        SUB     .L2     B_g1,       B_h3n,      B_x1            ;[19,3] 
||      STH     .D2T2   B_x0t,      *-B_o_ptr[24]               ;[19,3] 
||      SHR     .S1     A_x3,       trunc1,     A_x3t           ;[19,3] 
||      ADD     .L1X    A_g3a,      B_g3b,      A_g3            ;[19,3] 

        ADD     .L2     B_g1,       B_h3n,      B_x6            ;[20,3] 
||      STH     .D2T1   A_x3t,      *+B_o_ptr[0]                ;[20,3] 
||      ADD     .S1     A_h1,       A_g3,       A_x2            ;[20,3] 
||      SUB     .D1     A_h1,       A_g3,       A_x5            ;[20,3] 
;-
        SHR     .S1     A_x5,       trunc1,     A_x5t           ;[21,3] 
||      SHR     .S2     B_x1,       trunc1,     B_x1t           ;[21,3] 

        SHR     .S2     B_x6,       trunc1,     B_x6t           ;[22,3] 
||      SHR     .S1     A_x2,       trunc1,     A_x2t           ;[22,3] 
||      STH     .D2T2   B_x1t,      *-B_o_ptr[16]               ;[26,3] 

        STH     .D1T1   A_x5t,      *+A_o_ptr[8]                ;[23,3] 
||      SHR     .S1     A_x4,       trunc1,     A_x4t           ;[23,3] 

* ========================================================================= *
*   Interloop code:  Performs remaining epilog from horizontal pass, and    *
*   begins setup of the vertical pass.                                      *
*                                                                           *
*   In order to save some time between loops, I start performing pointer    *
*   fixups and constant initializations in the epilog of the horizontal     *
*   pass loop.  The horizontal pass works from the bottom of the            *
*   IDCT list and ends at the top, whereas the vertical pass works from     *
*   the top of the list and ends up at the bottom.  As a result, the        *
*   displacement between the required pointer settings between the two      *
*   loops is fixed, regardless of the number of IDCTs processed, since      *
*   the two loops pointers always meet at the top of the list.              *
*                                                                           *
*   The vertical loop needs a new repacking of the cosine terms: c6c3 and   *
*   c2c1.  By playing around w/ how the cosine terms are packed,            *
*   I was able to save two whole registers in the vertical loop and thus    *
*   fit into the register file.  I do this repacking partly here, and       *
*   partly in the vertical loop's prolog.                                   *
* ========================================================================= *

        STH     .D1T1   A_x4t,      *+A_o_ptr[0]                ;[24,3] 
;-
        STH     .D1T2   B_x6t,      *+A_o_ptr[16]               ;[25,3] 
||      ADDK    .S1     168,        A_i_ptr     ; Fixup for vert loop
||      ADDK    .S2     156,        B_i_ptr     ; Fixup for vert loop

        .asg            A15,        A_c6c3      ; Symbolic name from vert loop

        STH     .D2T1   A_x2t,      *-B_o_ptr[8]                ;[27,3] 
||      SHR     .S1     A_c3c1,     16, A_c6c3  ; Set up new cosine constant
||      MVC     .S2     IRP,        B0          ; Get SP so we can unspill A_o

; ============================================================================

; =============== SYMBOLIC REGISTER ASSIGNMENTS FOR VERT LOOP ================
        .asg            A14,        A_i_ptr ; Input pointer #1
        .asg            B15,        B_i_ptr ; Input pointer #2
        .asg            A11,        A_o_ptr ; Output pointer #1
        .asg            B11,        B_o_ptr ; Output pointer #2
        .asg            B13,        B_c7c5  ; Cosine terms c7, c5   (packed)
        .asg            A13,        A_c7c5  ; Cosine terms c7, c5   (packed)
        .asg            A15,        A_c6c3  ; Cosine terms c6, c3   (packed)
        .asg            B12,        B_c2c1  ; Cosine terms c2, c1   (packed)
        .asg            A4,         A_c1c4  ; Cosine term  c1, c4 (alternates)
        .asg            A2,         A_o     ; Outer loop counter
        .asg            B2,         B_i     ; Inner loop counter
        .asg            A12,        A_X7X6  ; Incoming coefs X7, X6 (packed)
        .asg            A8,         A_X5X4  ; Incoming coefs X5, X4 (packed)
        .asg            B10,        B_X3X2  ; Incoming coefs X3, X2 (packed)
        .asg            B14,        B_X1X0  ; Incoming coefs X1, X0 (packed)
        .asg            B9,         B_rnd   ; Rounding value applied to P0
        .asg            B1,         B_P0_t  ; Node P0, temporary pre-rounding
        .asg            B5,         B_P0    ; Rounded value of Node P0
        .asg            A7,         A_P1    ; Node P1 in signal flow graph
        .asg            B0,         B_X2c2  ; X2 * c2
        .asg            B4,         B_X2c6  ; X2 * c6
        .asg            A4,         A_X6c2  ; X6 * c2
        .asg            A3,         A_X6c6  ; X6 * c6
        .asg            A5,         A_p0    ; Node p0 in signal flow graph
        .asg            A8,         A_p1    ; Node p1 in signal flow graph
        .asg            B4,         B_r1    ; Node r1 in signal flow graph
        .asg            B3,         B_r0    ; Node r0 in signal flow graph
        .asg            B0,         B_g0    ; Node g0 in signal flow graph
        .asg            A1,         A_g1    ; Node g1 in signal flow graph
        .asg            B3,         B_h1    ; Node h1 in signal flow graph
        .asg            A3,         A_h0    ; Node h0 in signal flow graph
        .asg            B5,         B_X1c1  ; X1 * c1
        .asg            B1,         B_X1c3  ; X1 * c3
        .asg            B3,         B_X1c5  ; X1 * c5
        .asg            B8,         B_X1c7  ; X1 * c7
        .asg            B0,         B_X3c1  ; X3 * c1
        .asg            B0,         B_X3c3  ; X3 * c3
        .asg            B0,         B_X3c5  ; X3 * c5
        .asg            B9,         B_X3c7  ; X3 * c7
        .asg            A3,         A_X5c1  ; X5 * c1
        .asg            A1,         A_X5c3  ; X5 * c3
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -