⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fft16x32.asm

📁 davinci技术 源码 视频监控汇编源码
💻 ASM
📖 第 1 页 / 共 4 页
字号:
||      NORM    .L2     B_n,        B_radix2                    ;
||      MV      .S1X    B_SP,       A_SP                        ; Twin Stack Pointer


        STDW    .D1T1   A13:A12,    *+A_SP[6]
||      STW     .D2T2   B_ret,      *+B_SP[3]                   ; return
||      SHRU    .S1X    B_fft_jmp,  4,          A_fft_jmp_3
||      MV      .S2     B_i,        B_i_temp                    ;
||      MVK     .L1     4,          A_radix_temp                ;
||      AND     .L2     B_radix2,   1,          B_radix2        ;


        STDW    .D1T1   A11:A10,    *+A_SP[4]                   ; Save A11,A10
||      STDW    .D2T2   B11:B10,    *+B_SP[3]                   ; Save B11,B10
||[B_radix2] MVK.S1     2,          A_radix_temp                ; A23
||      MV      .S2X    A_ptr_x,    B_x                         ; B4       ; Restore A_ptr_x in B_x
||      SUB     .L2     B_i_temp,   1,          B_i_temp        ; Compensate for SPLOOPD


  [B_wh] SPLOOPD        10
||      STW     .D1T1   A_ptr_x,    *+A_SP[1]                   ; Save A_ptr_x
||      STW     .D2T2   B_ptr_y,    *+B_SP[2]                   ; Save B_ptr_y
||      SHL     .S1     A_radix_temp, 2,        A_radix_temp    ; Adjust A_radix;
||      ADD     .L2     B_x,        -16,        B_x             ;
||      MVC     .S2     B_i_temp,   ILC

* =========================== STAGE 0 ============================ *
        SPMASK
||      MV      .S1X    B_x,        A_x                         ;[ 1,1]
||      LDDW    .D2T2   *++B_x[2],  B_x_1:B_x_0                 ;[ 1,1]
||^     STDW    .D1T1   A15:A14,    *+A_SP[2]                   ; Save A15,A14
||^     ZERO    .L2     B_j
||^     SHRU    .S2     B_stride_temp, 2,       B_h2            ;

        SPMASK
||^     MV      .D1X    B_h2,       A_h2                        ;
||^     MV      .L2     B_stride_temp, B_stride
||^     SHRU    .S2     B_stride_temp, 1,       B_l1
||^     STDW    .D2T2   B13:B12,    *+B_SP[5]                   ;

        SPMASK
||      LDDW    .D1T1   *++A_x[3],  A_x_3:A_x_2                 ;[ 3,1]
||      LDDW    .D2T2   *B_x[B_h2], B_xh2_1i:B_xh2_0i           ;[ 3,1]
||^     CMPGTU  .L2X    B_stride,   A_radix_temp, B_wh
||^     ADD     .S2     B_l1,       B_h2,       B_l2            ;

        SPMASK
||      ROTL    .M1X    B_x,        0,          A_x_            ;[ 4,1]
||      LDDW    .D1T1   *++A_x[A_h2], A_xh2_3i:A_xh2_2i         ;[ 4,1]
||      LDDW    .D2T2   *B_x[B_l1], B_xl1_1i:B_xl1_0i           ;[ 4,1]
||^     SUB     .S1     A_fft_jmp_3, 3,         A_fft_jmp_3     ;
||^     MV      .L1     A_radix_temp, A_radix

        SPMASK
||      ADD     .S1X    B_j,        1,          A_j             ;[ 5,1]
||^     SHRU    .S2     B_fft_jmp,  2,          B_fft_jmp       ;
||^     STW     .D1T1   A_radix,    *+A_SP[15]

        SPMASK
||      SUB     .S2X    B_j,        A_fft_jmp_3, B_ifj          ;[ 6,1]
||^     MV      .L1X    B_fft_jmp,  A_tw_offset


        SPMASK
||      LDDW    .D1T1   *++A_x[A_h2], A_xl1_3i:A_xl1_2i         ;[ 7,1]
||      LDDW    .D2T2   *B_x[B_l2], B_xl2_1i:B_xl2_0i           ;[ 7,1]
||^     SHL     .S2     B_fft_jmp,  2,          B_fft_jmp       ;

        SPMASK
||[!B_ifj] ADD  .L2     B_x,        B_fft_jmp,  B_x             ;[ 8,1]
||^     MV      .D2X    A_w0,       B_w0

        SPMASK
||      ADDSUB  .L2     B_x_0,      B_xl1_0i,   B_xh0_0:B_xl0_0 ;[ 9,1]
||      ADD     .S1     A_j,        1,          A_j             ;[ 9,1]
||      LDDW    .D1T1   *A_w0[A_j], A_co11_si11:A_co30_si30     ;[ 9,1]
||      LDDW    .D2T2   *B_w0[B_j], B_co20_si20:B_co10_si10     ;[ 9,1]
||^     SHRU    .S2     B_stride,   2,          B_stride        ;


        ADD     .S2     B_j,        3,          B_j             ;[10,1]
||      ADDSUB  .L2     B_x_1,      B_xl1_1i,   B_xh1_0:B_xl1_0 ;[10,1]
||      LDDW    .D1T1   *++A_x[A_h2], A_xl2_3i:A_xl2_2i         ;[10,1]

* =========================== STAGE 1 ============================ *


  [!B_ifj] MPYSU.M2     0,          B_j,        B_j             ;[11,1]

        ADDSUB  .L2     B_xh2_1i,   B_xl2_1i,   B_xh21_0:B_xl21_0;[12,1]
||      ADDSUB  .L1     A_x_2,      A_xl1_2i,   A_xh0_1:A_xl0_1 ;[12,1]
||      LDDW    .D1T1   *A_w0[A_j], A_co31_si31:A_co21_si21     ;[12,1]

        SUB     .S2     B_xh1_0,    B_xh21_0,   B_yt0_0         ;[13,1]
||      ADDSUB  .L2     B_xh2_0i,   B_xl2_0i,   B_xh20_0:B_xl20_0;[13,1]
||      ADDSUB  .L1     A_x_3,      A_xl1_3i,   A_xh1_1:A_xl1_1 ;[13,1]
||      ROTL    .M1     A_x_,       0,          A_x___          ;[13,1]


        ADDSUB  .L2     B_xl0_0,    B_xl21_0,   B_xt1_0:B_xt2_0 ;[14,1]
||      SUB     .S2     B_xh0_0,    B_xh20_0,   B_xt0_0         ;[14,1]
||      MPY2IR  .M2     B_co20_si20, B_yt0_0,   B_pa:B_p8       ;[14,1]

        ADDSUB  .L2     B_xl1_0,    B_xl20_0,   B_yt2_0:B_yt1_0 ;[15,1]
||      ADD     .S2     B_xh20_0,   B_xh0_0,    B_x_0o          ;[15,1]
||      ADDSUB  .L1     A_xh2_2i,   A_xl2_2i,   A_xh20_1:A_xl20_1;[15,1]
||      MPY2IR  .M2     B_co10_si10, B_xt1_0,   B_p1:B_p3       ;[15,1]

        SUB     .S1     A_xh0_1,    A_xh20_1,   A_xt0_1         ;[16,1]
||      ADD     .L2     B_xh21_0,   B_xh1_0,    B_x_1o          ;[16,1]
||      ADDSUB  .L1     A_xh2_3i,   A_xl2_3i,   A_xh21_1:A_xl21_1;[16,1]
||      MPY2IR  .M2     B_co20_si20, B_xt0_0,   B_p9:B_pb       ;[16,1]

        ADDSUB  .L1     A_xl0_1,    A_xl21_1,   A_xt1_1:A_xt2_1 ;[17,1]
||      SUB     .S1     A_xh1_1,    A_xh21_1,   A_yt0_1         ;[17,1]
||      MPY2IR  .M1     A_co21_si21, A_xt0_1,   A_pd:A_pf       ;[17,1]
||      MPY2IR  .M2     B_co10_si10, B_yt1_0,   B_p2:B_p0       ;[17,1]

        ADD     .L1     A_xh21_1,   A_xh1_1,    A_x_3o          ;[18,1]
||      ADD     .S1     A_xh20_1,   A_xh0_1,    A_x_2o          ;[18,1]
||      MPY2IR  .M1     A_co21_si21, A_yt0_1,   A_pe:A_pc       ;[18,1]
||      MVD     .M2X    A_x___,     B_x__                       ;[18,1]

        MV      .S2X    A_co30_si30, B_co30_si30                ;[19,1]
||      ADDSUB  .L1     A_xl1_1,    A_xl20_1,   A_yt2_1:A_yt1_1 ;[19,1]
||      MPY2IR  .M1     A_co11_si11, A_xt1_1,   A_p5:A_p7       ;[19,1]


        ADD     .D2     B_p8,       B_p9,       B_xl1_0o        ;[20,1]
||      MPY2IR  .M2     B_co30_si30, B_xt2_0,   B_p11:B_p13     ;[20,1]
||      MPY2IR  .M1     A_co11_si11, A_yt1_1,   A_p6:A_p4       ;[20,1]

* =========================== STAGE 2 ============================ *


        SUB     .L2     B_p2,       B_p3,       B_xh2_1o        ;[21,1]
||      ADD     .S2     B_p0,       B_p1,       B_xh2_0o        ;[21,1]
||      MVD     .M1     A_x___,     A_x__                       ;[21,1]

        STDW    .D2T2   B_x_1o:B_x_0o, *B_x__[0]                ;[22,1]
||      SUB     .S1     A_pe,       A_pf,       A_xl1_3o        ;[22,1]
||      SUB     .S2     B_pa,       B_pb,       B_xl1_1o        ;[22,1]
||      MPY2IR  .M2     B_co30_si30, B_yt2_0,   B_p12:B_p10     ;[22,1]

        ADD     .S1     A_pc,       A_pd,       A_xl1_2o        ;[23,1]

        SUB     .L1     A_p6,       A_p7,       A_xh2_3o        ;[24,1]
||      ADD     .S1     A_p4,       A_p5,       A_xh2_2o        ;[24,1]

        STDW    .D2T2   B_xh2_1o:B_xh2_0o, *++B_x__[B_h2]       ;[25,1]
||      STDW    .D1T1   A_x_3o:A_x_2o, *++A_x__[1]              ;[25,1]
||      MPY2IR  .M1     A_co31_si31, A_yt2_1,   A_p16:A_p14     ;[25,1]

        STDW    .D2T2   B_xl1_1o:B_xl1_0o, *++B_x__[B_h2]       ;[26,1]
||      STDW    .D1T1   A_xh2_3o:A_xh2_2o, *++A_x__[A_h2]       ;[26,1]
||      MPY2IR  .M1     A_co31_si31, A_xt2_1,   A_p15:A_p17     ;[26,1]

        SUB     .S2     B_p12,      B_p13,      B_xl2_1o        ;[27,1]
||      ADD     .L2     B_p10,      B_p11,      B_xl2_0o        ;[27,1]


        STDW    .D2T2   B_xl2_1o:B_xl2_0o, *++B_x__[B_h2]       ;[28,1]
||      STDW    .D1T1   A_xl1_3o:A_xl1_2o, *++A_x__[A_h2]       ;[28,1]

        NOP

        SUB     .S1     A_p16,      A_p17,      A_xl2_3o        ;[30,1]
||      ADD     .L1     A_p14,      A_p15,      A_xl2_2o        ;[30,1]

* =========================== STAGE 3 ============================ *

        SPKERNEL        0, 0
||      STDW    .D1T1   A_xl2_3o:A_xl2_2o, *++A_x__[A_h2]       ;[31,1]

* =========================== END STAGE 3 ============================ *



LOOP_WHILE:
* ============ STAGES I,II,III (epilog) + Outer Loop  ======================= *

        LDW     .D2T2   *+B_SP[1],  B_x                         ; Restore A_ptr_x in B_x

        NOP

        MPYSU   .M2     6,          B_stride,   B_fft_jmp
||      ZERO    .D2     B_j

        MV      .D2     B_h2,       B_h2_old                    ;

        SHRU    .S1X    B_fft_jmp,  4,          A_fft_jmp_3

        SHRU    .S2     B_stride,   1,          B_l1

        ADDAH   .D1     A_ptr_w,    A_tw_offset, A_w0

        SHRU    .S2     B_stride,   2,          B_h2


        MV      .D2X    A_w0,       B_w0
||      ADD     .L2     B_l1,       B_h2,       B_l2

        SPMASKR
||      SUB     .D1     A_fft_jmp_3, 3,         A_fft_jmp_3
||      SHRU    .S2     B_fft_jmp,  2,          B_fft_jmp
||      ADD     .L2     B_x,        -16,        B_x


* ====== STAGE 0 (prolog) +STAGES II,III (epilog) + Outer Loop  ====== *

        SPMASK
||^     STDW    .D1T1   A_xl2_3o:A_xl2_2o, *++A_x__[A_h2]       ;[0]

        ADD     .L1X    A_tw_offset, B_fft_jmp, A_tw_offset
||      MV      .D1     A_h2,       A_h2_old

        SHL     .S2     B_fft_jmp,  2,          B_fft_jmp
||      MV      .L1X    B_h2,       A_h2

        NOP

        SPMASK
||^     STDW    .D2T2   B_xh2_1o:B_xh2_0o, *++B_x__[B_h2_old]   ;[4]

        SPMASK
||^     STDW    .D2T2   B_xl1_1o:B_xl1_0o, *++B_x__[B_h2_old]   ;[5]
||^     STDW    .D1T1   A_xh2_3o:A_xh2_2o, *++A_x__[A_h2_old]   ;[26,1]

        NOP

        SPMASK
||^     STDW    .D2T2   B_xl2_1o:B_xl2_0o, *++B_x__[B_h2_old]   ;[7]
||^     STDW    .D1T1   A_xl1_3o:A_xl1_2o, *++A_x__[A_h2_old]   ;[28,1]

        SHL     .S2     B_stride,   2,          B_stride        ;

        NOP


* ====== STAGE 0,I (prolog) +STAGES III (epilog) + Outer Loop  ====== *

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -