⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fft32x32.asm

📁 davinci技术 源码 视频监控汇编源码
💻 ASM
📖 第 1 页 / 共 4 页
字号:
||      MV      .D1     A_ptr_x,    A_x                         ; Restore A_ptr_x in A_x
||      STW     .D2T2   B_n,        *+B_SP[1]

        STDW    .D1T1   A11:A10,    *+A_SP[5]                   ; Save A11,A10
||      STDW    .D2T2   B13:B12,    *+B_SP[6]                   ; Save B13,B12
||      AND     .L2     B_radix2,   1,          B_radix2
||      MV      .L1     A_ptr_w,    A_w0                        ; Restore A_ptr_w in A_w0
||      MVK     .S1     4,          A_radix
||      MVC     .S2     B_i,        RILC

        STDW    .D1T1   A13:A12,    *+A_SP[7]                   ; Save A13,A12
||      STDW    .D2T2   B11:B10,    *+B_SP[4]                   ; Save B11,B10
||[B_radix2] MVK.S1     2,          A_radix
||      MV      .L2     B_n,        B_stride
||      ADD     .L1     A_x,        -16,        A_x


        STDW    .D2T1   A15:A14,    *+B_SP[3]                   ; Save A13,A12]
||      MVK     .S2     1,          B_wh
||      MPYSU   .M2     6,          B_stride,   B_fft_jmp
||      ADDAH   .D1     A_w0,       A_tw_offset, A_w0
||      MV      .L1     A_ptr_x,    A11




  [B_wh] SPLOOP         12
||      ZERO    .L1     A_j
||      STW     .D2T2   B_ret,      *+B_SP[4]
||      SHRU    .S2     B_stride,   2,          B_h2
||      MV      .L2     B_SP,       B11
||      STW     .D1T1   A_ptr_w,    *+A_SP[0]
* =========================== STAGE 0 ============================ *
        SPMASK
||      ADDAD   .D1     A_w0,       A_j,        A_w             ;[ 1,1]
||^     MV      .S2X    A_radix,    B_radix
||^     SHRU    .S1X    B_fft_jmp,  3,          A_fft_jmp
||^     STW     .D2T2   B_ptr_y,    *+B11[3]                    ; Save B_ptr_y

        SPMASK
||^     MV      .D1     B_h2,       A_h2
||^     SHL     .S2     B_radix,    2,          B_radix         ; Adjust B_radix

        SPMASK
||      MV      .S2X    A_x,        B_x                         ;[ 3,1]
||^     STW     .D1T1   A11,        *+A_SP[2]


        SPMASK
||      LDDW    .D1T2   *++A_x[2],  B_x_1:B_x_0                 ;[ 4,1]
||^     SHRU    .S2     B_stride,   2,          B_stride
||^     MV      .L1     A_x,        A_y

        SHL     .S2     B_h2,       4,          B_2h2           ;[ 5,1]
||      LDDW    .D1T2   *++A_x[A_h2], B_xh2_1i:B_xh2_0i         ;[ 5,1]
||      LDDW    .D2T1   *++B_x[3],  A_x_3:A_x_2                 ;[ 5,1]

        SPMASK
||      LDDW    .D1T2   *++A_x[A_h2], B_xl1_1i:B_xl1_0i         ;[ 6,1]
||      LDDW    .D2T1   *++B_x[B_h2], A_xh2_3i:A_xh2_2i         ;[ 6,1]
||^     SHRU    .S2     B_fft_jmp,  1,          B_fft_jmp
||^     ZERO    .L2     B_j

        SPMASK
||      ADD     .S1     6,          A_j,        A_j             ;[ 7,1]
||      LDDW    .D1T2   *A_x[A_h2], B_xl2_1i:B_xl2_0i           ;[ 7,1]
||      LDDW    .D2T1   *++B_x[B_h2], A_xl1_3i:A_xl1_2i         ;[ 7,1]
||^     ADD     .L1X    A_tw_offset, B_fft_jmp, A_tw_offset

        SUB     .S1X    A_x,        B_2h2,      A_x             ;[ 8,1]
||      LDDW    .D2T1   *B_x[B_h2], A_xl2_3i:A_xl2_2i           ;[ 8,1]

        SPMASK
||      SUB     .L1     A_j,        A_fft_jmp,  A_ifj           ;[ 9,1]
||^     SHL     .S2     B_fft_jmp,  1,          B_fft_jmp

        MV      .S2X    A_w,        B_w                         ;[10,1]
||      LDDW    .D1T2   *A_w[0],    B_co10:B_si10               ;[10,1]

        ADDSUB  .L2     B_x_0,      B_xl1_0i,   B_xh0_0:B_xl0_0 ;[11,1]
||[!A_ifj] ADD  .S1X    A_x,        B_fft_jmp,  A_x             ;[11,1]
||      LDDW    .D2T1   *B_w[3],    A_co11:A_si11               ;[11,1]
||      LDDW    .D1T2   *A_w[2],    B_co30:B_si30               ;[11,1]

        ADDSUB  .L1     A_x_2,      A_xl1_2i,   A_xh0_1:A_xl0_1 ;[12,1]
||      ADDSUB  .L2     B_x_1,      B_xl1_1i,   B_xh1_0:B_xl1_0 ;[12,1]
||[!A_ifj] ZERO .S1     A_j                                     ;[12,1]
||      LDDW    .D2T1   *B_w[5],    A_co31:A_si31               ;[12,1]
||      LDDW    .D1T2   *A_w[4],    B_co21:B_si21               ;[12,1]


* =========================== STAGE 1 ============================ *


        ADDSUB  .L1     A_xh2_2i,   A_xl2_2i,   A_xh20_1:A_xl20_1;[13,1]
||      ADDSUB  .L2     B_xh2_1i,   B_xl2_1i,   B_xh21_0:B_xl21_0;[13,1]
||      LDDW    .D2T1   *B_w[1],    A_co20:A_si20               ;[13,1]

        ADD     .D2     6,          B_j,        B_j             ;[14,1]
||      ADDSUB  .L2     B_xl0_0,    B_xl21_0,   B_xt1_0:B_xt2_0 ;[14,1]
||      ADDSUB  .L1     A_xh2_3i,   A_xl2_3i,   A_xh21_1:A_xl21_1;[14,1]

        ADDSUB  .L2     B_xh2_0i,   B_xl2_0i,   B_xh20_0:B_xl20_0;[15,1]
||      ADDSUB  .L1     A_x_3,      A_xl1_3i,   A_xh1_1:A_xl1_1 ;[15,1]
||      SMPY32  .M2     B_si10,     B_xt1_0,    B_p3            ;[15,1]

        ADDSUB  .L1     A_xl0_1,    A_xl21_1,   A_xt1_1:A_xt2_1 ;[16,1]
||      ADDSUB  .L2     B_xl1_0,    B_xl20_0,   B_yt2_0:B_yt1_0 ;[16,1]
||      SUB     .S1     A_xh1_1,    A_xh21_1,   A_yt0_1         ;[16,1]
||      SMPY32  .M2     B_co10,     B_xt1_0,    B_p1            ;[16,1]

        ADDSUB  .L1     A_xl1_1,    A_xl20_1,   A_yt2_1:A_yt1_1 ;[17,1]
||      SUB     .L2     B_xh0_0,    B_xh20_0,   B_xt0_0         ;[17,1]
||      ADD     .S1     A_xh0_1,    A_xh20_1,   A_x_2o          ;[17,1]
||      SMPY32  .M1     A_co11,     A_xt1_1,    A_p5            ;[17,1]
||      SMPY32  .M2     B_co10,     B_yt1_0,    B_p2            ;[17,1]

        SUB     .S1     A_xh0_1,    A_xh20_1,   A_xt0_1         ;[18,1]
||      ADD     .L2     B_xh0_0,    B_xh20_0,   B_x_0o          ;[18,1]
||      SMPY32  .M1     A_si11,     A_yt1_1,    A_p4            ;[18,1]
||      SMPY32  .M2     B_si10,     B_yt1_0,    B_p0            ;[18,1]

        ADD     .S2     B_xh1_0,    B_xh21_0,   B_x_1o          ;[19,1]
||      SMPY32  .M2     B_si30,     B_xt2_0,    B_p13           ;[19,1]
||      SMPY32  .M1     A_si11,     A_xt1_1,    A_p7            ;[19,1]

        SUB     .S2     B_xh1_0,    B_xh21_0,   B_yt0_0         ;[20,1]
||      SMPY32  .M1     A_si31,     A_xt2_1,    A_p17           ;[20,1]
||      SMPY32  .M2X    A_co20,     B_xt0_0,    B_p9            ;[20,1]

        SUB     .S2     B_p2,       B_p3,       B_xh2_1o        ;[21,1]
||      SMPY32  .M2X    B_si21,     A_xt0_1,    B_pf            ;[21,1]
||      SMPY32  .M1     A_co11,     A_yt1_1,    A_p6            ;[21,1]

        ADD     .L1     A_p4,       A_p5,       A_xh2_2o        ;[22,1]
||      ADD     .L2     B_p0,       B_p1,       B_xh2_0o        ;[22,1]
||      ADD     .S1     A_xh1_1,    A_xh21_1,   A_x_3o          ;[22,1]
||      SMPY32  .M2     B_si30,     B_yt2_0,    B_p10           ;[22,1]
||      SMPY32  .M1X    A_si20,     B_yt0_0,    A_p8            ;[22,1]

        SMPY32  .M1     A_co31,     A_yt2_1,    A_p16           ;[23,1]
||      SMPY32  .M2X    A_co20,     B_yt0_0,    B_pa            ;[23,1]

        SUB     .S2X    B_j,        A_fft_jmp,  B_ifj           ;[24,1]
||      SMPY32  .M1     A_si31,     A_yt2_1,    A_p14           ;[24,1]
||      SMPY32  .M2     B_co30,     B_xt2_0,    B_p11           ;[24,1]

* =========================== STAGE 2 ============================ *


  [!B_ifj] ZERO .S2     B_j                                     ;[25,1]
||      SUB     .S1     A_p6,       A_p7,       A_xh2_3o        ;[25,1]
||      SMPY32  .M2X    B_co21,     A_xt0_1,    B_pd            ;[25,1]
||      SMPY32  .M1X    B_si21,     A_yt0_1,    A_pc            ;[25,1]

        SHL     .S1     A_h2,       4,          A_2h2           ;[26,1]
||      STDW    .D1T2   B_x_1o:B_x_0o, *++A_y[2]                ;[26,1]
||      MV      .S2X    A_y,        B_y                         ;[26,1]
||      SMPY32  .M2     B_co30,     B_yt2_0,    B_p12           ;[26,1]
||      SMPY32  .M1X    A_si20,     B_xt0_0,    A_pb            ;[26,1]

        STDW    .D1T2   B_xh2_1o:B_xh2_0o, *++A_y[A_h2]         ;[27,1]
||      STDW    .D2T1   A_x_3o:A_x_2o, *++B_y[3]                ;[27,1]
||      SUB     .S1     A_p16,      A_p17,      A_xl2_3o        ;[27,1]
||      SMPY32  .M1X    B_co21,     A_yt0_1,    A_pe            ;[27,1]

        STDW    .D2T1   A_xh2_3o:A_xh2_2o, *++B_y[B_h2]         ;[28,1]
||      ADD     .S2X    A_p8,       B_p9,       B_xl1_0o        ;[28,1]
||      SMPY32  .M1     A_co31,     A_xt2_1,    A_p15           ;[28,1]

        NOP

        SUB     .S2     B_p12,      B_p13,      B_xl2_1o        ;[30,1]
||      ADD     .L1X    A_pc,       B_pd,       A_xl1_2o        ;[30,1]

        SUB     .L1X    A_pe,       B_pf,       A_xl1_3o        ;[31,1]
||      SUB     .L2X    B_pa,       A_pb,       B_xl1_1o        ;[31,1]

        STDW    .D1T2   B_xl1_1o:B_xl1_0o, *++A_y[A_h2]         ;[32,1]
||      ADD     .L1     A_p14,      A_p15,      A_xl2_2o        ;[32,1]
||      ADD     .L2     B_p10,      B_p11,      B_xl2_0o        ;[32,1]


        SUB     .S1     A_y,        A_2h2,      A_y             ;[33,1]
||      STDW    .D1T2   B_xl2_1o:B_xl2_0o, *A_y[A_h2]           ;[33,1]
||      STDW    .D2T1   A_xl1_3o:A_xl1_2o, *++B_y[B_h2]         ;[33,1]

        STDW    .D2T1   A_xl2_3o:A_xl2_2o, *B_y[B_h2]           ;[34,1]
        SPKERNEL        0, 0
||[!B_ifj] ADD  .L1X    A_y,        B_fft_jmp,  A_y             ;[35,1]

* =========================== END STAGE 2 ============================ *



LOOP_WHILE:

* ============ STAGES I,II (epilog) + Outer Loop  ======================= *

        NOP             4
        LDW     .D1T1   *+A_SP[0],  A_w0                        ; Restore A_ptr_w in A_w0
        LDW     .D1T1   *+A_SP[2],  A_x                         ; Restore A_ptr_x in A_x
        ZERO    .D1     A_j
        NOP
        MV      .L2     B_fft_jmp,  B_fft_jmp_old
        ADDAH   .D1     A_w0,       A_tw_offset, A_w0
||      MV      .S2     B_h2,       B_h2_old

        MV      .S1     A_h2,       A_h2_old
||      SHRU    .S2     B_stride,   2,          B_h2
||      ADD     .D1     A_x,        -16,        A_x

        SPMASKR
||      MV      .S1     B_h2,       A_h2
||      MV      .L1     A_y,        A_y_old
* ============ STAGE 0 (prolog)+STAGE II (epilog) + Outer Loop  =======================*

        MV      .L1     A_x,        A_y

        SPMASK
||      ZERO    .D2     B_j
||^     SHL     .S1     A_h2_old,   4,          A_2h2           ;[26,1]
||^     STDW    .D1T2   B_x_1o:B_x_0o, *++A_y_old[2]            ;[26,1]
||^     MV      .S2X    A_y_old,    B_y                         ;[26,1]

        SPMASK
||      MPYSU   .M2     6,          B_stride,   B_fft_jmp
||^     STDW    .D1T2   B_xh2_1o:B_xh2_0o, *++A_y_old[A_h2_old] ;[27,1]

        SPMASK
||^     STDW    .D2T1   A_xh2_3o:A_xh2_2o, *++B_y[B_h2_old]     ;[28,1]
||      MV      .L1     A_y_old,    A_y_old_2
||      MV      .L2     B_h2_old,   B_h2_old_2

        SHRU    .S1X    B_fft_jmp,  3,          A_fft_jmp
||      MV      .L2     B_y,        B_y_old
||      MV      .L1     A_h2_old,   A_h2_old_2

        NOP

        MV      .S2     B_fft_jmp_old, B_fft_jmp_old_2

        SPMASK
||      SHRU    .S2     B_fft_jmp,  1,          B_fft_jmp_temp
||^     STDW    .D1T2   B_xl1_1o:B_xl1_0o, *++A_y_old_2[A_h2_old_2];[32,1]

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -