⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fast hadamard transform.txt

📁 c6000的标准函数库
💻 TXT
📖 第 1 页 / 共 2 页
字号:
        sub2.s1 a7,a2,a9        ; d4(6)  and d4(7)    
||      sub2.s2 b7,b2,b9        ; d4(22) and d4(23)   
||      mpyhu.m1 a0,b13,a1      ; d4(1) 
||      and.l1 a0,a15,a0        ; d4(0)  
||      mpyhu.m2 b0,b13,b1      ; d4(17) 
||      and.l2 b0,a15,b0        ; d4(16) 
         
        add2.s1 a7,a2,a2        ; d4(4)  and d4(5) 
||      add2.s2 b7,b2,b2        ; d4(20) and d4(21) 
||      mpyhu.m1 a6,b13,a7      ; d4(3)           
||      and.l1 a6,a15,a6        ; d4(2)            
||      mpyhu.m2 b6,b13,b7      ; d4(19)           
||      and.l2 b6,a15,b6        ; d4(18)           

        ; Start of 5th stage

        sub2.s1 a4,a5,a12       ; d4(10) and d4(11)
||      sub2.s2 b4,b5,b12       ; d4(26) and d4(27) 
||      add.l1 a0,a1,a0         ; d5(0)            
||      sub.d1 a0,a1,a1         ; d5(1)          
||      add.l2 b0,b1,b0         ; d5(16)        
||      sub.d2 b0,b1,b1         ; d5(17)      
||      mpyhu.m1 a2,b13,a10     ; d4(5)  
||      mpyhu.m2 b2,b13,b10     ; d4(21)         


        add2.s1 a4,a5,a4        ; d4(8)  and d4(9)  
||      add2.s2 b4,b5,b4        ; d4(24) and d4(25)
||      add.l1 a6,a7,a6         ; d5(2)          
||      sub.d1 a6,a7,a7         ; d5(3)            
||      add.l2 b6,b7,b6         ; d5(18)           
||      sub.d2 b6,b7,b7         ; d5(19)          
||      mpylhu.m1 a2,b13,a2     ; d4(4)          
||      mpylhu.m2 b2,b13,b2     ; d4(20)            

        sub2.s1 a3,a8,a5        ; d4(14) and d4(15) 
||      sub2.s2 b3,b8,b5        ; d4(30) and d4(31)  
||      mpyhu.m1 a9,b13,a11     ; d4(6)             
||      and.l1 a9,a15,a9        ; d4(7)             
||      mpyhu.m2 b9,b13,b11     ; d4(22)          
||      and.l2 b9,a15,b9        ; d4(23)             


        add2.s1 a3,a8,a3        ; d4(12) and d4(13) 
||      add2.s2 b3,b8,b3        ; d4(28) and d4(29)  
||      and.l1  a0,a15,a0       ; clear upper half of register
||      and.l2  b0,a15,b0       ; clear upper half of register

        add.l1 a2,a10,a2        ; d5(4)            
||      sub.d1 a2,a10,a10       ; d5(5)            
||      add.l2 b2,b10,b2        ; d5(20)            
||      sub.d2 b2,b10,b10       ; d5(21)           
||      mpyhu.m1 a4,b13,a8      ; d4(9)              
||      mpyhu.m2 b4,b13,b8      ; d4(25)            
||      and.s1  a6,a15,a6       ; clear upper half of register  
||      and.s2  b6,a15,b6       ; clear upper half of register


        shl.s1 a1,16,a1         ; d5(1)  in upper half
||      shl.s2 b1,16,b1         ; d5(17) in upper half
||      add.l1 a9,a11,a9        ; d5(6)            
||      sub.d1 a9,a11,a11       ; d5(7)            
||      add.l2 b9,b11,b9        ; d5(22)           
||      sub.d2 b9,b11,b11       ; d5(23)            
||      mpylhu.m1 a4,b13,a4     ; d4(8)            
||      mpylhu.m2 b4,b13,b4     ; d4(24)          

        shl.s1 a7,16,a7         ; d5(3)  in upper half
||      shl.s2 b7,16,b7         ; d5(19) in upper half
||      add.d1 a0,a1,a0         ; d5(0)  and d5(1)   
||      add.d2 b0,b1,b0         ; d5(16) and d5(17)     
||      mpyhu.m1 a12,b13,a1     ; d4(10)             
||      and.l1 a12,a15,a12      ; d4(11)            
||      mpyhu.m2 b12,b13,b1     ; d4(26)            
||      and.l2 b12,a15,b12      ; d4(27)            


        and.l1 a6,a15,a6        ; clear upper half of register
||      and.l2 b6,a15,b6        ; clear upper half of register
||      mpylhu.m1 a2,b13,a2     ; clear upper half of register
||      mpylhu.m2 b2,b13,b2     ; clear upper half of register

        shl.s1 a10,16,a10       ; d5(5)  in upper half
||      shl.s2 b10,16,b10       ; d5(21) in upper half
||      add.d1 a6,a7,a6         ; d5(2)  and d5(3) 
||      add.d2 b6,b7,b6         ; d5(18) and d5(19)  
||      mpyhu.m1 a3,b13,a7      ; d4(12) 
||      and.l1 a3,a15,a3        ; d4(13) 
||      mpyhu.m2 b3,b13,b7      ; d4(28) 
||      and.l2 b3,a15,b3        ; d4(29) 
 
        shl.s1 a11,16,a11       ; d5(7)  in upper half
||      shl.s2 b11,16,b11       ; d5(23) in upper half
||      add.d1 a2,a10,a2        ; d5(4)  and d5(5)    
||      add.d2 b2,b10,b2        ; d5(20) and d5(21)  
||      mpyhu.m1 a5,b13,a10     ; d4(14)              
||      and.l1 a5,a15,a5        ; d4(15)              
||      mpyhu.m2 b5,b13,b10     ; d4(30)             
||      and.l2 b5,a15,b5        ; d4(31)             

        ; End of 4th stage

        stw.d1 a0,*a14++        ; 
||      add.l1 a4,a8,a4         ; d5(8)
||      sub.s1 a4,a8,a8         ; d5(9)
||      add.l2 b4,b8,b4         ; d5(24)
||      sub.s2 b4,b8,b8         ; d5(25)

        shl.s1 a8,16,a8         ; d5(9)  in upper half
||      shl.s2 b8,16,b8         ; d5(25) in upper half
||      add.l1 a12,a1,a12       ; d5(10)             
||      sub.d1 a12,a1,a1        ; d5(11)             
||      add.l2 b12,b1,b12       ; d5(26)             
||      sub.d2 b12,b1,b1        ; d5(27)             

        shl.s1 a1,16,a1         ; d5(11) in upper half
||      shl.s2 b1,16,b1         ; d5(27) in upper half
||      add.l1 a3,a7,a3         ; d5(12)            
||      sub.d1 a3,a7,a7         ; d5(13)            
||      add.l2 b3,b7,b3         ; d5(28)            
||      sub.d2 b3,b7,b7         ; d5(29)            
||      mpylhu.m1 a9,b13,a9     ; clear upper half of register
||      mpylhu.m2 b9,b13,b9     ; clear upper half of register
 
        shl.s1 a7,16,a7         ; d5(13) in upper half
||      shl.s2 b7,16,b7         ; d5(29) in upper half
||      add.l1 a5,a10,a5        ; d5(14)             
||      sub.d1 a5,a10,a10       ; d5(15)             
||      add.l2 b5,b10,b5        ; d5(30)             
||      sub.d2 b5,b10,b10       ; d5(31)             
||      mpyhl.m2 b13,b13,b0     ; extract the counter in b0
  
        stw.d1 a6,*a14++        ; 
||      stw.d2 b0,*b14++        ;         
||      shl.s1 a10,16,a10       ; d5(15) in upper half
||      shl.s2 b10,16,b10       ; d5(31) in upper half
||      add.l1 a9,a11,a9        ; d5(6)  and d5(7)  
||      add.l2 b9,b11,b9        ; d5(22) and d5(23) 
 
        and.l1 a4,a15,a4        ; clear upper half of register
||      and.l2 b4,a15,b4        ; clear upper half of register
||      mpylhu.m1 a12,b13,a12   ; clear upper half of register
||      mpylhu.m2 b12,b13,b12   ; clear upper half of register

        and.l1 a3,a15,a3        ; clear upper half of register
||      and.l2 b3,a15,b3        ; clear upper half of register
||      mpylhu.m1 a5,b13,a5     ; clear upper half of register
||      mpylhu.m2 b5,b13,b5     ; clear upper half of register

        stw.d1 a2,*a14++        ; 
||      stw.d2 b6,*b14++        ; 
||      add.l1 a4,a8,a4         ; d5(8)  and d5(9)   
||      add.l2 b4,b8,b4         ; d5(24) and d5(25)  
||      sub.s2 b0,1,b0          ; decrement counter
 
        stw.d1 a9,*a14++        ; 
||      stw.d2 b2,*b14++        ; 
||      add.l1 a3,a7,a3         ; d5(12) and d5(13)  
||      add.l2 b3,b7,b3         ; d5(28) and d5(29) 
||      add.s1 a12,a1,a12       ; d5(10) and d5(11) 
||      add.s2 b12,b1,b12       ; d5(26) and d5(27) 
||      mpylh.m2 b0,b13,b13     ; move counter to b13(low)
   
        stw.d1 a4,*a14++        ; 
||      stw.d2 b9,*b14++        ; 
||      add.l1 a5,a10,a5        ; d5(14) and d5(15)  
||      add.l2 b5,b10,b5        ; d5(30) and d5(31)  
    
        stw.d1 a12,*a14++       ; 
||      stw.d2 b4,*b14++        ; 
||      mvklh.s2 1,b13          ; b13(high) = 1 ( for multiplications)
||[b0]  b.s1   H5_loop          ; branch back
   
        stw.d1 a3,*a14++        ; 
||      stw.d2 b12,*b14++       ;         
 
        stw.d1 a5,*a14++        ; 
||      stw.d2 b3,*b14++        ;         
||[!b0] mv.l2 a13,b13           ; 

        stw.d2 b5,*b14++        ;       

        ; End of 5th stage
        ; Adjust pointers                                       
                                                
   [b0] add.l1 a14,a13,a14      ; if looping back, a14 = &d(32)
|| [b0] add.l2 b14,a13,b14      ;
||[!b0] sub.s1 a14,a13,a14      ; if not looping,  a14 = &d(32) 
||[!b0] sub.s2 b14,b13,b14      ; if not looping,  b14 = &d(48)

  [!b0] sub.s1 a14,a13,a14      ; if not looping,  a14 = &d(16)
||[!b0] sub.s2 b14,b13,b14      ; if not looping,  b14 = &d(32)

  [!b0] sub.s1 a14,a13,a14      ; if not looping,  a14 = &d(0)

        ; Branch to H5_loop occurs here 

        ; H6 loop (last stage)
H6:

        mvk.s2 8,b0             ; initialize counter
||      mv     b14,a13          ; set up load pointers: 
||      add    a14,4,b14        ; a14 = &d[0]  ; b14 = &d[2]  (upper even and
odd)
||      add    b14,4,b13        ; a13 = &d[32] ; b13 = &d[34] (lower even and
odd)

        ldw.d1 *a14++[2],a4     ; upper even load
||      ldw.d2 *b14++[2],b4     ; upper odd  load
||      mv     a14,a12          ; set up store pointers
||      mv     b14,b12          ; a12 = &d[0]  ; b12 = &d[2]

        ldw.d1 *a13++[2],a3     ; lower even load
||      ldw.d2 *b13++[2],b3     ; lower odd load
||      mv     a13,a11          ; set up store pointers
||      mv     b13,b11          ; a11 = &d[0]  ; b11 = &d[2]

        nop 2

        ldw.d1 *a14++[2],a4     ; upper even load                           
||      ldw.d2 *b14++[2],b4     ; upper odd  load

        ldw.d1 *a13++[2],a3     ; lower even load                           
||      ldw.d2 *b13++[2],b3     ; lower odd load

        add2.s1 a4,a3,a2        ; upper/lower even -> upper even
||      add2.s2 b4,b3,b2        ; upper/lower odd  -> upper odd

        sub2.s1 a4,a3,a1        ; upper/lower even -> lower even
||      sub2.s2 b4,b3,b1        ; upper/lower odd  -> lower odd
||[b0]  sub.l2 b0,1,b0          ; 

        ldw.d1 *a14++[2],a4     ; upper even load                           
||      ldw.d2 *b14++[2],b4     ; upper odd  load
||[b0]  b      H6loop       

        ldw.d1 *a13++[2],a3     ; lower even load                           
||      ldw.d2 *b13++[2],b3     ; lower odd load

H6loop:
        add2.s1 a4,a3,a2        ; upper/lower even -> upper even
||      add2.s2 b4,b3,b2        ; upper/lower odd  -> upper odd
||      stw a2,*a12++[2]                
||      stw b2,*b12++[2]

        sub2.s1 a4,a3,a1        ; upper/lower even -> lower even
||      sub2.s2 b4,b3,b1        ; upper/lower odd  -> lower odd
||[b0]  sub.l2 b0,1,b0          ; 
||      stw a1,*a11++[2]
||      stw b1,*b11++[2]

        ldw.d1 *a14++[2],a4     ; upper even load                           
||      ldw.d2 *b14++[2],b4     ; upper odd  load
||[b0]  b      H6loop       

        ldw.d1 *a13++[2],a3     ; lower even load                           
||      ldw.d2 *b13++[2],b3     ; lower odd load

        ; Branch to H6loop occurs here                  
       
        ; Restore context       

        ldw.d2  *+b15[10],b3    ; pop b3 
        add.l1x b15,4,a8        ; copy stack pointer
        ldw.d2  *b15++[2],b14   ; pop b14 
||      ldw.d1  *a8++[2],a14    ; pop a14 
        ldw.d2  *b15++[2],b13   ; pop b13 
||      ldw.d1  *a8++[2],a13    ; pop a13 
        ldw.d2  *b15++[2],b12   ; pop b12 
||      ldw.d1  *a8++[2],a12    ; pop a12 
        ldw.d2  *b15++[2],b11   ; pop b11 
||      ldw.d1  *a8++[2],a11    ; pop a11 
||      b.s2     b3
        ldw.d2  *b15++[2],b10   ; pop b10 
||      ldw.d1  *a8,a10         ; pop a10 
        nop     4

        .end

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -