📄 discrete cosine transform.txt

📁 c6000的标准函数库
💻 TXT
📖 第 1 页 / 共 5 页
字号:
        .asg            A10,        A_F5t   ; Truncated result for F5
        .asg            A10,        A_F6t   ; Truncated result for F6
        .asg            B5,         B_F7t   ; Truncated result for F7
        .asg            B2,         B_i     ; Inner loop counter #1
        .asg            A1,         A_i     ; Inner loop counter #2
        .asg            B1,         B_o     ; Outer loop counter
        .asg            A2,         A_c     ; Prolog collapse counter
; ========================================================================= ;


* ========================================================================= *
*   Initialization code / Stack Management                                  *
*                                                                           *
*   This code is responsible for saving registers to the stack, disabling   *
*   interrupts, and setting up for the vertical loop.                       *
*                                                                           *
*   This function requires 16 words of stack.  A10...A15, B10...B14, CSR,   *
*   IRP, and 'num_fdcts' are all pushed on the stack.  For speed, this      *
*   code uses twin stack pointers to offload registers onto the stack as    *
*   quickly as possible.                                                    *
*                                                                           *
*   The majority of the code in this function is not interruptible.         *
*   Therefore, interrupts are disabled almost immediately after entry       *
*   into the function, and the previous interruptibility state is restored  *
*   on exit.  The previous value of CSR is pushed on the stack and          *
*   restored on exit.                                                       *
*                                                                           *
*   Since all 32 registers are used by the vertical loop, the stack         *
*   pointer is saved in the IRP register.  The previous contents of IRP     *
*   are also pushed on the stack.                                           *
*                                                                           *
*   Initialization for constants (cosine terms, etc.) is overlapped with    *
*   the prolog of the vertical loop to save time.  Pointer setup for the    *
*   output pointer is also hidden in the prolog.                            *
*                                                                           *
*   Early exit code suppresses most of the function's activity (including   *
*   most of the stack accesses) if num_fdcts (in B4) is zero.  It is not    *
*   possible to exit the function faster.                                   *
* ========================================================================= *

;-
        STW     .D2T1   A15,        * B15--[16]     ; Save A15, get stk frame
||      MV      .L1X    B15,        A15             ; Twin Stack Pointer
||      SHL     .S2     B4,         3,          B_o ; iters == num_fdcts * 8

  [ B_o]STW     .D1T2   B14,        *-A15  [ 2]     ; Save B14 (SP[14])
||[ B_o]ADD     .L2     B_o,        -1,         B_o ; Adj. for parallel iters
||[ B_o]ADDK    .S1     48,         A_i_ptr         ; Point to row 3, col 0
||[!B_o]B       .S2     B3                          ; Abort if num_fdcts == 0
||[!B_o]LDW     .D2T1   *++B15[16], A15             ; Restore A15 on abort
; ===== Interrupts masked by branch delay slots =====
;-
  [ B_o]STW     .D1T1   A13,        *-A15  [ 3]     ; Save A13 (SP[13])
||[ B_o]STW     .D2T2   B11,        *+B15  [ 8]     ; Save B11
||[ B_o]MVC     .S2     CSR,        B0              ; Snapshot CSR

  [ B_o]STW     .D1T1   A12,        *-A15  [ 5]     ; Save A12 (SP[11])
||[ B_o]STW     .D2T2   B12,        *+B15  [10]     ; Save B12

  [ B_o]STW     .D1T2   B13,        *-A15  [ 7]     ; Save B13 (SP[ 9])
||[ B_o]STW     .D2T1   A11,        *+B15  [12]     ; Save A11 
||[ B_o]MVC     .S2     IRP,        B5              ; Snapshot IRP
;-
  [ B_o]STW     .D1T1   A10,        *-A15  [ 9]     ; Save A10 (SP[ 7])
||[ B_o]STW     .D2T2   B10,        *+B15  [ 6]     ; Save B10
||[ B_o]AND     .L2     B0,         -2,         B2  ; Clear GIE bit in CSR
||[ B_o]MV      .L1X    B5,         A1              ; Partitioning MV

  [ B_o]STW     .D2T2   B3,         *+B15  [ 5]     ; Save return address
||[ B_o]STW     .D1T1   A1,         *-A15  [12]     ; Save IRP (SP[ 4])
||[ B_o]MV      .L1X    B0,         A0              ; Partitioning MV
||[ B_o]MVC     .S2     B2,         CSR             ; Mask interrupts
; ===== Branch Occurs ===== 
;-
; =========================== PIPE LOOP PROLOG ============================ ;
        LDH     .D1T1   *+A_i_ptr  [ 8],        A_f4            ;[ 1,1]
||      MVC     .S2     B15,        IRP             ; Save Stack Pointer

        LDH     .D1T2   *-A_i_ptr  [16],        B_f1            ;[ 2,1]
||      MVK     .S1     4,          A_i             ; Inner loop counter #1

        LDH     .D1T1   * A_i_ptr++[ 1],        A_f3            ;[ 3,1]
||      MVKL    .S1     cst_c7,     A_c1c7          ; Cosine term C7

        LDH     .D1T1   *-A_i_ptr  [25],        A_f0            ;[ 4,1]
||      MVKL    .S1     cst_c0,     A_k1c0          ; Cosine term C0
;-
        LDH     .D1T2   *+A_i_ptr  [15],        B_f5            ;[ 5,1]
||      MVKL    .S1     cst_c6,     A_c2c6          ; Cosine term C6
||      MVKL    .S2     cst_c6,     B_c2c6          ; Cosine term C6
||      MV      .L2X    A_c1c7,     B_c1c7          ; Twin constant register

        LDH     .D1T2   *-A_i_ptr  [ 9],        B_f2            ;[ 6,1]
||      MVKLH   .S1     cst_c2,     A_c2c6          ; Cosine term C2
||      SUB     .L1     A_i,        2,          A_c ; Prolog collapse cnt = 2
||      ADD     .L2X    A_i_ptr,    -2,         B_o_ptr
;-
        LDH     .D1T2   *+A_i_ptr  [23],        B_f6            ;[ 7,1]
||      MVKLH   .S2     cst_c1,     B_c1c7          ; Cosine term C1

        LDH     .D1T1   *+A_i_ptr  [31],        A_f7            ;[ 8,1]
||      MVKLH   .S1     cst_c1,     A_c1c7          ; Cosine term C1
||      MVKLH   .S2     cst_c2,     B_c2c6          ; Cosine term C2

        MVKL    .S2     cst_c5,     B_c3c5          ; Cosine term C5
||      MVKLH   .S1     1,          A_k1c0          ; Constant: 0x0001
||      STW     .D2T1   A14,        *+B15  [15]     ; Save A14
;-
        SUB     .L1     A_f3,       A_f4,       A_q1            ;[ 9,1] q1=g2
||      ADD     .S1     A_f3,       A_f4,       A_h0            ;[10,1]
||      MVKLH   .S2     cst_c3,     B_c3c5          ; Cosine term C3
||      STW     .D2T2   B_o,        *+B15  [ 3]     ; Spill horiz loop count 
||      STW     .D1T1   A0,         *-A15  [14]     ; Save CSR (SP[ 2])

        LDH     .D1T1   *+A_i_ptr  [ 8],        A_f4            ;[ 1,2]
||      MV      .L2X    A_k1c0,     B_k1c0          ; Twin constant register
||      MVK     .S2     16,         B_i             ; Inner loop counter #2
;-
; =========================== PIPE LOOP KERNEL ============================ ;
v_loop:
        SHR     .S1     A_F6,       13,         A_F6t           ;[22,1]
||      MPY     .M2     B_S0,       B_c3c5,     B_c5S0          ;[22,1]
||      MPY     .M1X    A_Q0,       B_c3c5,     A_c5Q0          ;[22,1]
||      ADD     .D2     B_f1,       B_f6,       B_g1            ;[12,2]
||      SUB     .S2     B_f2,       B_f5,       B_g3            ;[12,2]
||      SUB     .L2     B_f1,       B_f6,       B_h3            ;[12,2]
||      LDH     .D1T2   *-A_i_ptr  [16],        B_f1            ;[ 2,3]
||[ A_c]ADD     .L1     A_c,        -1,         A_c             ;pro. collapse
;-
v_loop_1:
  [!A_c]STH     .D2T2   B_F4,       *+B_o_ptr  [ 8]             ;[23,1]
||      MPY     .M2     B_S1,       B_c1c7,     B_c7S1          ;[23,1]
||      MPYLH   .M1X    A_Q0,       B_c3c5,     A_c3Q0          ;[23,1]
||      ADD     .L2     B_h3,       B_g3,       B_s0a           ;[13,2]
||      SUB     .S2     B_h3,       B_g3,       B_q0a           ;[13,2]
||      SUB     .S1     A_f0,       A_f7,       A_s1            ;[13,2] s1=h2
||      ADD     .L1     A_f0,       A_f7,       A_g0            ;[13,2]
||      LDH     .D1T1   * A_i_ptr++[ 1],        A_f3            ;[ 3,3]
;-
v_loop_2:
  [!A_c]STH     .D2T2   B_F0,       *-B_o_ptr  [24]             ;[24,1]
||      SUB     .S2X    B_c3S0,     A_c5Q0,     B_F3            ;[24,1]
||      MPYLH   .M2     B_S1,       B_c1c7,     B_c1S1          ;[24,1]
||      ADD     .L2     B_f2,       B_f5,       B_h1            ;[14,2]
||      SUB     .S1     A_g0,       A_h0,       A_r0            ;[14,2]
||      ADD     .L1     A_g0,       A_h0,       A_p0            ;[14,2]
||      MPYSU   .M1X    B_q0a,      A_k1c0,     A_q0b           ;[14,2]
||      LDH     .D1T1   *-A_i_ptr  [25],        A_f0            ;[ 4,3]
;-
v_loop_3:
  [!A_c]SHR     .S1     A_F2,       13,         A_F2t           ;[25,1]
||[!A_c]MPY     .M1     A_i,        4,          A_i             ;[25,1]
||      SHR     .S2     B_F3,       13,         B_F3t           ;[25,1]
||      SUB     .L2X    B_c7S1,     A_c1Q1,     B_F7            ;[25,1]
||      ADD     .L1X    A_c3Q0,     B_c5S0,     A_F5            ;[25,1]
||      SUB     .D2     B_g1,       B_h1,       B_r1            ;[15,2]
||      MPYSU   .M2     B_s0a,      B_k1c0,     B_s0b           ;[15,2]
||      LDH     .D1T2   *+A_i_ptr  [15],        B_f5            ;[ 5,3]
;-
v_loop_4:
        ADD     .L1X    A_c7Q1,     B_c1S1,     A_F1            ;[26,1]
||[ B_o]B       .S2     v_loop                                  ;[26,1]
||[!A_c]STH     .D2T1   A_F6t,      *+B_o_ptr  [24]             ;[26,1]
||      ADD     .L2     B_g1,       B_h1,       B_p1            ;[16,2]
||      ADDK    .S1     07FFFh,     A_q0b                       ;[16,2]
||      MPY     .M1     A_r0,       A_c2c6,     A_c6r0          ;[16,2]
||      MPY     .M2     B_i,        4,          B_i             ;[ 6,3]
||      LDH     .D1T2   *-A_i_ptr  [ 9],        B_f2            ;[ 6,3]
;-
v_loop_5:
  [!A_c]STH     .D2T1   A_F2t,      *-B_o_ptr  [ 8]             ;[27,1]
||      SHR     .S1     A_F5,       13,         A_F5t           ;[27,1]
||      MPY     .M2     B_r1,       B_c2c6,     B_c6r1          ;[17,2]
||      SUB     .L2X    A_p0,       B_p1,       B_F4            ;[17,2]
||      ADDK    .S2     07FFFh,     B_s0b                       ;[17,2]
||      MPYH    .M1     A_q0b,      A_k1c0,     A_q0            ;[17,2]
||      LDH     .D1T2   *+A_i_ptr  [23],        B_f6            ;[ 7,3]
;-
v_loop_6:
  [!A_c]STH     .D2T2   B_F3t,      * B_o_ptr++[ 1]             ;[28,1]
||      SHR     .S1     A_F1,       13,         A_F1t           ;[28,1]
||      ADD     .L2X    A_p0,       B_p1,       B_F0            ;[18,2]
||      MPYLH   .M1     A_r0,       A_c2c6,     A_c2r0          ;[18,2]
||      MPYH    .M2     B_s0b,      B_k1c0,     B_s0            ;[18,2]
||[ B_o]SUB     .S2     B_o,        1,          B_o             ;[18,2]
||      LDH     .D1T1   *+A_i_ptr  [31],        A_f7            ;[ 8,3]
v_loop_7:
;-
        SHR     .S2     B_F7,       13,         B_F7t           ;[29,1]
||[!A_c]STH     .D2T1   A_F5t,      *+B_o_ptr  [15]             ;[29,1]
||      MPYLH   .M2     B_r1,       B_c2c6,     B_c2r1          ;[19,2]
||      SUB     .L1     A_q1,       A_q0,       A_Q0            ;[19,2]
||      ADD     .D1     A_q1,       A_q0,       A_Q1            ;[19,2]
||      SUB     .S1     A_f3,       A_f4,       A_q1            ;[ 9,3] q1=g2
;-
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -