⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 example 3-48.asm

📁 《基于TI DSP的通用算法实现》程序代码
💻 ASM
📖 第 1 页 / 共 3 页
字号:
*   CODESIZE                                                              *  
*                                                                         *  
*        1248 bytes                                                       *  
*                                                                         *  
* ----------------------------------------------------------------------- *
*            Copyright (c) 2003 Texas Instruments, Incorporated.          *
*                           All Rights Reserved.                          *
* ======================================================================= *

                .global _DSPF_sp_cfftr2_dit
                
                .asg B8, B_x
                .asg A5, A_w
                .asg B7, B_x2mp1
                .asg B6, B_x2m
                .asg A7, A_si
                .asg A6, A_co
                .asg B0, B_lx2mc
                .asg B4, B_8n2
                .asg B13, B_n2
                .asg A13, A_p1
                .asg B11, B_p2
                .asg B3, B_p4
                .asg A15, A_p3
                .asg B2, B_lx2iac
                .asg A3, A_x
                .asg A10, A_8n2
                .asg A12, A_rtemp
                .asg B10, B_itemp
                .asg A1, A_stcnt
                .asg A15, A_x2ias
                .asg B3, B_x2mp1s
                .asg A15, A_x2ms
                .asg B3, B_x2iap1s
                .asg A9, A_x2iap1
                .asg A8, A_x2ia
                .asg B1, B_hafn2
                .asg A2, A_nby2
                .asg A4, A_wbase
                

_DSPF_sp_cfftr2_dit:

       ; push all the registers (also CSR, IRP )
       SUBAW  .D2     B15,        16,       B15       
||     B      .S1     no_int                          
||     MVC    .S2     CSR,        B1                  
       
       MV     .S1X    B15,        A1                  
||     STW    .D2T1   A10,        *B15[0]             

       STW    .D2T2   B1,         *B15[2]       
||     STW    .D1T1   A11,        *A1[1]              

       STW    .D2T2   B11,        *B15[4]             
||     STW    .D1T1   A12,        *A1[3]              
||     AND    .L2     B1,         -2,       B1  ; disable interrupts      
||     MVC    .S2     IRP,        B2                  

       STW    .D2T2   B2,         *B15[6]             
||     STW    .D1T1   A13,        *A1[5]              
||     MVC    .S2     B1,         CSR                 

       STW    .D2T2   B13,        *B15[8]             
||     STW    .D1T1   A14,        *A1[7]              

no_int:

       STW    .D2T2   B14,        *B15[10]            
||     STW    .D1T1   A15,        *A1[9]              

       STW    .D2T2   B3,         *B15[12]            
||     MVC    .S2     B15,        IRP                 

       ADDAW  .D1     A4,         A6,       A3        ; init x[2m] ptr 
||     SHR    .S2X    A6,         1,        B_n2      ; init n2
||     MV     .L1X    B4,         A_w                 ; init w ptr
||     STW    .D2T2   B10,        *B15[13]            ; push b10

       MV     .S2X    A3,         B_x                 ; transfer x[2m] ptr
||     SHL    .S1     A6,         2,        A_8n2     ; keep 8n2 for addr incr
||     SUB    .L2     B_n2,       6,        B15       ; for inner loop cntr
||     STW    .D2T2   B12,        *B15[11]            ; push b12

* ====================== PIPED LOOP PROLOG ======================================= *

       LDDW   .D2     *B_x++,     B_x2mp1:B_x2m       ; load x[2m+1]:x[2m]
||     LDDW   .D1     *A_w++,     A_si:A_co           ; load si:co
||     MV     .S2     B_x,        B5                  ; init x[2m] store ptr

       MV     .L1     A4,         A11                 ; init x[2ia] store ptr
||     SHL    .S2X    A6,         2,        B_8n2     ; copy of 8n2 on b-side
||     MV             B_n2,       B_lx2mc             ; load cntr for x[2m] loads

 [B_lx2mc]SUB .L2     B_lx2mc,    1,        B_lx2mc   ; decr x[2m] load cntr
||     MV     .S1     A4,         A_x                 ; f xx2 = x
||     MV     .S2X    A4,         B14                 ; save base x ptr
||     MV     .D1     A4,         A0                  ; save base x ptr
||     SUB    .L1     A_w,        8,        A_wbase   ; save w base ptr

       MV     .D2     B_n2,       B_lx2iac            ; init x[2ia] load cntr
||     MV     .L1X    B_n2,       A_stcnt             ; init store cntr
||     SHR    .S2     B_n2,       1,        B_hafn2   ; init half of n2     

       LDDW   .D2     *B_x++,     B_x2mp1:B_x2m       ; load x[2m+1]:x[2m]

       MPYSP  .M1X    A_co,       B_x2m,    A_p1      ; p1=co*x[2m]
||     MPYSP  .M2X    A_co,       B_x2mp1,  B_p2      ; p2=co*x[2m+1]

 [B_lx2mc]SUB .S2     B_lx2mc,    1,        B_lx2mc   ; decr load cntr
||     MV     .L1X    B15,        A_nby2              ; init loop cntr
       
       MPYSP  .M1X    A_si,       B_x2mp1,  A_p3      ; p3=si*x[2m+1]
||     MPYSP  .M2X    A_si,       B_x2m,    B_p4      ; p4=si*x[2m]

       LDDW   .D2     *B_x++,     B_x2mp1:B_x2m       ; load x[2m+1]:x[2m]

       MPYSP  .M1X    A_co,       B_x2m,    A_p1      ; p1=co*x[2m]
||     MPYSP  .M2X    A_co,       B_x2mp1,  B_p2      ; p2=co*x[2m+1]

  [B_lx2mc]SUB.S2     B_lx2mc,    1,        B_lx2mc   ; decr load cntr

       LDDW   .D1     *A_x++,     A_x2iap1:A_x2ia     ; load x[2ia+1]:x[2ia]
||     MPYSP  .M1X    A_si,       B_x2mp1,  A_p3      ; p3=si*x[2m+1]
||     MPYSP  .M2X    A_si,       B_x2m,    B_p4      ; p4=si*x[2m]
||     ADDSP  .L1     A_p1,       A_p3,     A_rtemp   ; rtemp=p1+p3
||     SUBSP  .L2     B_p2,       B_p4,     B_itemp   ; itemp=p2-p4

       LDDW   .D2     *B_x++,     B_x2mp1:B_x2m       ; load x[2m+1]:x[2m]
||[B_lx2iac]SUB.S2     B_lx2iac,  1,        B_lx2iac  ; decr load cntr

       MPYSP  .M1X    A_co,       B_x2m,    A_p1      ; p1=co*x[2m]
||     MPYSP  .M2X    A_co,       B_x2mp1,  B_p2      ; p2=co*x[2m+1]

  [B_lx2mc]SUB.S2     B_lx2mc,    1,        B_lx2mc   ; decr load cntr

       LDDW   .D1     *A_x++,     A_x2iap1:A_x2ia     ; load x[2ia+1]:x[2ia]
||     MPYSP  .M1X    A_si,       B_x2mp1,  A_p3      ; p3=si*x[2m+1]
||     MPYSP  .M2X    A_si,       B_x2m,    B_p4      ; p4=si*x[2m]
||     ADDSP  .L1     A_p1,       A_p3,     A_rtemp   ; rtemp=p1+p3
||     SUBSP  .L2     B_p2,       B_p4,     B_itemp   ; itemp=p2-p4

       LDDW   .D2     *B_x++,     B_x2mp1:B_x2m       ; load x[2m+1]:x[2m]
||[B_lx2iac]SUB.S2    B_lx2iac,   1,        B_lx2iac  ; decr load cntr
||     ADDSP  .L1     A_x2ia,     A_rtemp,  A_x2ias   ; x[2ia]=x[2ia]+rtemp
||     SUBSP  .L2X    A_x2iap1,   B_itemp,  B_x2mp1s  ; x[2m+1]=x[2ia+1]-itemp

       MPYSP  .M1X    A_co,       B_x2m,    A_p1      ; p1=co*x[2m]
||     MPYSP  .M2X    A_co,       B_x2mp1,  B_p2      ; p2=co*x[2m+1]
||     B      .S2     loop                            

  [B_lx2mc]SUB.S2     B_lx2mc,    1,        B_lx2mc   ; decr load cntr
||     SUBSP  .L1     A_x2ia,     A_rtemp,  A_x2ms    ; x[2m]=x[2ia]-rtemp
||     ADDSP  .L2X    A_x2iap1,   B_itemp,  B_x2iap1s ; x[2ia+1]=x[2ia+1]+itemp

* ================== PIPED LOOP KERNEL ======================================== *
loop:

       LDDW   .D1     *A_x++,     A_x2iap1:A_x2ia     ; load x[2ia+1]:x[2ia]
||     MPYSP  .M1X    A_si,       B_x2mp1,  A_p3      ; p3=si*x[2m+1]
||     MPYSP  .M2X    A_si,       B_x2m,    B_p4      ; p4=si*x[2m]
||[!B_lx2mc]ADD.D2    B_x,        B_8n2,    B_x       ; incr load ptr if required
||     ADDSP  .L1     A_p1,       A_p3,     A_rtemp   ; rtemp=p1+p3
||     SUBSP  .L2     B_p2,       B_p4,     B_itemp   ; itemp=p2-p4
||[!A_stcnt]ADD.S2    B5,         B_8n2,    B5        ; incr store ptr if required
||[!A_stcnt]ADD.S1    A11,        A_8n2,    A11       ; incr store ptr if required

       LDDW   .D2     *B_x++,     B_x2mp1:B_x2m       ; load x[2m+1]:x[2m]
||[!B_lx2mc]LDDW.D1   *A_w++,     A_si:A_co           ; load si:co
||[!B_lx2mc]MPY .M2   1,          B_n2,     B_lx2mc   ; reset load cntr
||[B_lx2iac]SUB .S2   B_lx2iac,   1,        B_lx2iac  ; decr load cntr
||     ADDSP  .L1     A_x2ia,     A_rtemp,  A_x2ias   ; x[2ia]=x[2ia]+rtemp
||     SUBSP  .L2X    A_x2iap1,   B_itemp,  B_x2mp1s  ; x[2m+1]=x[2ia+1]-itemp
||[A_nby2]SUB .S1     A_nby2,     1,        A_nby2    ; decr loop cntr
||[!A_stcnt]MPY.M1X   1,          B_n2,     A_stcnt   ; reset store cntr

       MPYSP  .M1X    A_co,       B_x2m,    A_p1      ; p1=co*x[2m]
||     MPYSP  .M2X    A_co,       B_x2mp1,  B_p2      ; p2=co*x[2m+1]
||[!B_lx2iac]ADD.S1   A_x,        A_8n2,    A_x       ; incr load ptr if required
||[A_nby2]B   .S2     loop                            ; Branch loop
||     STW    .D1T1   A_x2ias,    *A11++              ; store x[2ia]
||     STW    .D2T2   B_x2mp1s,   *B5[1]              ; store x[2m+1]

  [B_lx2mc]SUB.S2     B_lx2mc,    1,        B_lx2mc   ; decr load cntr
||[!B_lx2iac]MPY.M2   1,          B_n2,     B_lx2iac  ; decr load cntr
||     SUBSP  .L1     A_x2ia,     A_rtemp,  A_x2ms    ; x[2m]=x[2ia]-rtemp
||     ADDSP  .L2X    A_x2iap1,   B_itemp,  B_x2iap1s ; x[2ia+1]=x[2ia+1]+itemp
||     STW    .D1     B_x2iap1s,  *A11++              ; store x[2ia+1]
||     STW    .D2     A_x2ms,     *B5++[2]            ; store x[2m]
||[A_stcnt]SUB.S1     A_stcnt,    1,        A_stcnt   ; decr store cntr

* ======================= END OF PIPED LOOP KERNEL ========================== *
       
       LDDW   .D1     *A_x++,     A_x2iap1:A_x2ia     ; (e) load x[2ia+1]:x[2ia]
||     MPYSP  .M1X    A_si,       B_x2mp1,  A_p3      ; (e) p3=si*x[2m+1]
||     MPYSP  .M2X    A_si,       B_x2m,    B_p4      ; (e) p4=si*x[2m]
||[!B_lx2mc]ADDAW.D2  B14,        B_n2,     B_x       ; (p) init B_x for outer loop
||     ADDSP  .L1     A_p1,       A_p3,     A_rtemp   ; (e) rtemp=p1+p3
||     SUBSP  .L2     B_p2,       B_p4,     B_itemp   ; (e) itemp=p2-p4
||[!A_stcnt]ADD.S2    B5,         B_8n2,    B5        ; (e) incr store ptr if required
||[!A_stcnt]ADD.S1    A11,        A_8n2,    A11       ; (e) incr store ptr if required

  ; loads are predicated with B_hafn2 so that the last prolog does not
  ; perform invalid loads 
  

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -