📄 example 3-34.asm
字号:
|| SHR .S2 B4, 15, B4 ;t6b >>= 15
|| SMPYH .M1X B6, A2, A11 ;t1a=hi(c)*hi(xt)
|| SMPYH .M2 B6, B10, B3 ;t1b=hi(c)*yt(hi)
|| MV .S1 A6, A4 ;split long life of store_re
|| ADD .D2 B12, B3, B9 ;yt3 = yt1+yt2
SUB .L1 A0, A9, A2 ;xt = xt2-xt1
|| SUB .L2 B12, B3, B10 ;yt = yt2-yt1
|| SHR .S1 A14, 15, A14 ;t6a >>=15
|| SMPYH .M1X A8, B10, A9 ;t2a=hi(s)*yt(hi)
|| SMPYH .M2X B11, A2, B12 ;t2b=hi(s)*hi(xt)
ADD .L1 A0, A9, A0 ;xt3 = xt1+xt2
|| MV .L2X A8, B11 ;copy s to B-side
|| SHR .S1 A15, 15, A15 ;t7a >>= 15
|| SHR .S2 B7, 15, B7 ;t7b >>= 15
ADD .L1 A14, A15, A13 ;t8a = t6a+t7a
|| ADD .L2 B4, B7, B14 ;t8b = t6b+t7b
|| ADD .S1 A12, A13, A12 ;t5a = t3a+t4a
|| ADD .D2 B13, B14, B13 ;t5b = t3b+t4b
|| MPYHSLU .M1X B6, A2, A12 ;t3a = hi(c)*lo(xt)
|| MPYHSLU .M2 B6, B10, B13 ;t3b = hi(c)*lo(yt)
|| STW .D1 A0, *--A6[A7] ;store x[2i]
|| ADD .S2X A4, 4, B7 ;initialize store_im
ADD .S1 A11, A9, A0 ;t9a = t1a+t2a
|| SUB .S2 B3, B12, B9 ;t9b = t1b+t2b
|| ADD .L1 A12, A13, A14 ;t10a = t5a+t8a
|| SUB .L2 B13, B14, B14 ;t10b = t5b-t8b
|| MPYLUHS .M1X B6, A2, A13 ;t4a = lo(c)*hi(xt)
|| MPYLUHS .M2 B6, B10, B14 ;t4b = lo(c)*hi(yt)
|| STW .D1 B9, *+A6[1] ;store x[2i+1]
|| LDW .D2 *--B15[1], A2 ;**pop k from stack
ADD .D1 A0, A14, A15 ;t11a = t9a+t10a
|| ADD .D2 B9, B14, B12 ;t11b = t9b+t10b
|| SHR .S1 A12, 15, A12 ;t3a >>= 15
|| SHR .S2 B13, 15, B13 ;t3b >>= 15
|| MPYHSLU .M1X A8, B10, A14 ;t4a = hi(s)*lo(yt)
|| MPYHSLU .M2X B11, A2, B4 ;t4b = hi(s)*lo(xt)
SHR .S1 A13, 15, A13 ;t4a >>= 15
|| SHR .S2 B14, 15, B14 ;t4b >>= 15
|| MPYLUHS .M1X A8, B10, A15 ;t6a = lo(s)*hi(yt)
|| MPYLUHS .M2X B11, A2, B7 ;t4b = hi(s)*lo(xt)
|| STW .D1 A15, *++A4[A7] ;store x[2(i+n2)]
|| STW .D2 B12, *++B7[B5] ;store x[2(i+n2)+1]
SHR .S2 B4, 15, B4 ;t6b >>= 15
|| SMPYH .M1X B6, A2, A11 ;t1a=hi(c)*hi(xt)
|| SMPYH .M2 B6, B10, B3 ;t1a=hi(c)*yt(hi)
|| MV .D1 A6, A4 ;split long life of store_re
|| SHL .S1 A5, 1, A5 ;yt3 = yt1+yt2
|| LDW .D2 *--B15[1], A13 ;**pop ict from stack
SHR .S1 A14, 15, A14 ;t6a >>=15
|| SMPYH .M1X A8, B10, A9 ;t2a=hi(s)*yt(hi)
|| SMPYH .M2X B11, A2, B12 ;t2b=hi(s)*hi(xt)
SHR .S1 A15, 15, A15 ;t7a >>= 15
|| SHR .S2 B7, 15, B7 ;t7b >>= 15
||[A2] SUB .D1 A2, 1, A2 ;k--
|| LDW .D2 *--B15[1], A6 ; pop &w from stack
ADD .D1 A14, A15, A13 ;t8a = t6a+t7a
|| ADD .D2 B4, B7, B14 ;t8b = t6b+t7b
|| ADD .L1 A12, A13, A12 ;t5a = t3a+t4a
|| ADD .L2 B13, B14, B13 ;t5b = t3b+t4b
|| ADD .S2X A4, 4, B7 ;initialize store_im
||[A2] B .S1 K_LOOP ;branch to K_LOOP
ADD .S1 A11, A9, A0 ;t9a = t1a+t2a
|| SUB .S2 B3, B12, B9 ;t9b = t1b+t2b
|| ADD .D1 A12, A13, A14 ;t10a = t5a+t8a
|| SUB .D2 B13, B14, B14 ;t10b = t5b-t8b
ADD .S1 A0, A14, A15 ;t11a = t9a+t10a
|| ADD .S2 B9, B14, B12 ;t11b = t9b+t10b
STW .D1 A15, *++A4[A7] ;store x[2(i+n)]
|| STW .D2 B12, *++B7[B5] ;store x[2(i+n)+1]
LDW .D2 *--B15[1], A4 ;**pop &x from stack
LDW .D2 *--B15[1], A14 ;**pop n2 from stack
END_K_LOOP:
;---- LAST ITERATION OF k loop--------
NOP 2 ;need to wait for &x
SUB .S2X A13, 2, B1 ;compensate for prolog
MV .S1 A4, A3 ;load_re = &x[0]
|| ADD .S2X A4, 4, B8 ;load_im = &x[1]
MV .S1 A4, A4 ;store_re = &x[0]
|| ADD .S2X A4, 4, B7 ;store_im = &x[1]
LDW .D1 *A3++[2], A9 ;load xt1
|| LDW .D2 *B8++[2], B3 ;load yt1
LDW .D1 *A3++[2], A0 ;load xt2
|| LDW .D2 *B8++[2], B12 ;load yt2
LDW .D1 *A3++[2], A9 ;load xt1
|| LDW .D2 *B8++[2], B3 ;load yt1
[B1] SUB .S2 B1, 1, B1 ;decrement inner loop counter
[B1] B .S2 I_LAST ;branch to I_LAST
LDW .D1 *A3++[2], A0 ;load xt2
|| LDW .D2 *B8++[2], B12 ;load yt2
;----I_LAST KERNEL STARTS HERE--------------------------------
I_LAST:
ADD .L1 A9, A0, A1 ;xt3 = xt1+xt2
|| ADD .L2 B3, B12, B9 ;yt3 = yt1+yt2
|| SUB .S1 A9, A0, A2 ;xt = xt1-xt2
|| SUB .S2 B3, B12, B10 ;yt = yt1-yt2
|| LDW .D1 *A3++[2], A9 ;load xt1
|| LDW .D2 *B8++[2], B3 ;load yt1
[B1] SUB .S2 B1, 1, B1 ;decrement inner loop counter
|| STW .D1 A1, *A4++[2] ;store x[2i]
|| STW .D2 B9, *B7++[2] ;store x[2i+1]
[B1] B .S2 I_LAST ;branch to I_LAST
|| STW .D1 A2, *A4++[2] ;store x[2(i+n2)]
|| STW .D2 B10, *B7++[2] ;store x[2(i+n2+1)]
LDW .D1 *A3++[2], A0 ;load xt2
|| LDW .D2 *B8++[2], B12 ;load yt2
;----I_LAST KERNEL ENDS HERE--------------------------------
ADD .S1 A9, A0, A1 ;xt3 = xt1+xt2
|| ADD .S2 B3, B12, B9 ;yt3 = yt1+yt2
|| SUB .D1 A9, A0, A2 ;xt = xt1-xt2
|| SUB .D2 B3, B12, B10 ;yt = yt1-yt2
STW .D1 A1, *A4++[2] ;store x[2i]
|| STW .D2 B9, *B7++[2] ;store x[2i+1]
STW .D1 A2, *A4++[2] ;store x[2(i+n2)]
STW .D2 B10, *B7++[2] ;store x[2(i+n2)+1]
ADD .L1 A9, A0, A1 ;xt3 = xt1+xt2
|| ADD .L2 B3, B12, B9 ;yt3 = yt1+yt2
|| SUB .S1 A9, A0, A2 ;xt = xt1-xt2
|| SUB .S2 B3, B12, B10 ;yt = yt1-yt2
STW .D1 A1, *A4++[2] ;store x[2i]
|| STW .D2 B9, *B7++[2] ;store x[2i+1]
STW .D1 A2, *A4++[2] ;store x[2(i+n2)]
|| STW .D2 B10, *B7++[2] ;store x[2(i+n2)+1]
B_END:
****** END Benchmark Timing ***
;RESTORE C RUNTIME ENVIRONMENT
LDW .D2 *--B15[1], B3 ;**pop B3 from stack
|| SUB .S1X B15, 4, A0 ;copy SP to A-side
LDW .D2 *--B15[1], B10 ;**pop B10 from stack
|| LDW .D1 *--A0[2], A10 ;**pop A10 from stack
LDW .D2 *--B15[2], B11 ;**pop B11 from stack
|| LDW .D1 *--A0[2], A11 ;**pop A11 from stack
LDW .D2 *--B15[2], B12 ;**pop B12 from stack
|| LDW .D1 *--A0[2], A12 ;**pop A12 from stack
LDW .D2 *--B15[2], B13 ;**pop B13 from stack
|| LDW .D1 *--A0[2], A13 ;**pop A13 from stack
LDW .D2 *--B15[2], B14 ;**pop B14 from stack
|| LDW .D1 *--A0[2], A14 ;**pop A14 from stack
LDW .D2 *--B15[2], B15 ;**pop B15 from stack
|| LDW .D1 *--A0[2], A15 ;**pop A15 from stack
MVKL .S2 csr_stack, B1
MVKH .S2 csr_stack, B1
;Re-enable interupts
LDW .D2 *B1, B9
B .S2 B3 ;return to calling function
NOP 3
MVC .S2 B9, CSR ; restore interrupts
*******************************************************************************
* END OF r2fft32 *
*******************************************************************************
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -