📄 main.asm
字号:
|| ADD .L2 B23,B20,B30
|| MV .S2X A9,B20
LDW .D2T1 *+SP(80),A6
|| SUB .L2 B17,2,B10
LDW .D2T2 *+SP(84),B29
LDNW .D1T1 *A18,A10
|| ADDAH .D2 B17,19,B12
|| MV .L1 A21,A18
ADD .D2 B17,18,B11
|| LDNDW .D1T1 *A30,A13:A12
ADDAH .D2 B17,29,B13
|| MV .L2X A8,B17
LDW .D2T2 *+SP(124),B16
LDW .D2T1 *+SP(64),A3
LDW .D2T2 *+SP(112),B18
LDW .D2T2 *+SP(72),B22
LDW .D2T2 *+SP(92),B19
LDW .D2T2 *+SP(132),B21
LDW .D2T2 *+SP(128),B24
LDW .D2T1 *+SP(76),A5
LDW .D2T1 *+SP(136),A7
LDW .D2T1 *+SP(108),A16
LDW .D2T1 *+SP(48),A22
LDW .D2T1 *+SP(88),A17
LDW .D2T1 *+SP(68),A19
LDW .D2T1 *+SP(12),A23
LDW .D2T1 *+SP(24),A28
LDW .D2T1 *+SP(16),A25
LDW .D2T1 *+SP(20),A26
LDW .D2T1 *+SP(28),A29
LDW .D2T1 *+SP(104),A4
LDW .D2T1 *+SP(32),A31
LDW .D2T1 *+SP(36),A1
LDW .D2T1 *+SP(44),A2
LDW .D2T1 *+SP(60),A11
LDW .D2T2 *+SP(100),B28
.dwpsn "D:\CCStudio_v3.1\MyProjects\matrix\matrix_mpyc.c",11,13
MVK .L1 0xa,A8 ; |11|
.dwpsn "D:\CCStudio_v3.1\MyProjects\matrix\matrix_mpyc.c",11,9
MVC .S2 CSR,B31
|| STW .D2T2 DP,*+SP(156)
|| MV .L1X B4,A21
|| SUB .S1 A8,2,A0
|| MV .D1 A6,A8
MVC .S2 IRP,B0 ; save irp
|| STW .D2T2 B31,*+SP(164)
|| AND .L2 -2,B31,B4
|| MV .L1X B7,A6
|| MV .S1 A3,A20
STW .D2T2 B0,*+SP(160)
|| MVC .S2 B4,CSR ; interrupts off
|| MV .L1X B5,A24
LDH .D2T2 *B29++,B7 ; |15| (P) <0,0>
|| LDH .D1T1 *A2++,A14 ; |15| (P) <0,9>
LDH .D2T2 *B28++,B5 ; |15| (P) <0,10>
|| LDH .D1T1 *A11++,A3 ; |15| (P) <0,10>
DW$L$_matrix_mpyc$3$E:
;*----------------------------------------------------------------------------*
;* SOFTWARE PIPELINE INFORMATION
;*
;* Loop source line : 11
;* Loop opening brace source line : 12
;* Loop closing brace source line : 21
;* Known Minimum Trip Count : 10
;* Known Maximum Trip Count : 10
;* Known Max Trip Count Factor : 10
;* Loop Carried Dependency Bound(^) : 13
;* Unpartitioned Resource Bound : 17
;* Partitioned Resource Bound(*) : 17
;* Resource Partition:
;* A-side B-side
;* .L units 0 1
;* .S units 1 0
;* .D units 17* 16
;* .M units 16 14
;* .X cross paths 3 17*
;* .T address paths 17* 16
;* Long read paths 0 0
;* Long write paths 0 0
;* Logical ops (.LS) 0 0 (.L or .S unit)
;* Addition ops (.LSD) 4 28 (.L or .S or .D unit)
;* Bound(.L .S .LS) 1 1
;* Bound(.L .S .D .LS .LSD) 8 15
;*
;* Searching for software pipeline schedule at ...
;* ii = 17 Cannot allocate machine registers
;* Regs Live Always : 26/24 (A/B-side)
;* Max Regs Live : 32/33
;* Max Cond Regs Live : 1/1
;* ii = 17 Schedule found with 3 iterations in parallel
;* Done
;*
;* Epilog not removed
;* Collapsed epilog stages : 0
;*
;* Prolog not removed
;* Collapsed prolog stages : 0
;*
;* Minimum required memory pad : 0 bytes
;*
;* For further improvement on this loop, try option -mh4
;*
;* Minimum safe trip count : 3
;*----------------------------------------------------------------------------*
L3: ; PIPED LOOP PROLOG
LDH .D1T1 *A1++,A3 ; |15| (P) <0,11>
MV .L2X A4,B26
MV .L1 A12,A27
|| LDH .D2T2 *B26++,B0 ; |15| (P) <0,13>
|| LDH .D1T1 *A16++,A12 ; |15| (P) <0,13>
MVC .S2 SP,IRP ; save sp
|| LDH .D1T1 *A7++,A15 ; |15| (P) <0,14>
|| MPY .M2X A10,B7,SP ; |15| (P) <0,5>
LDH .D1T1 *A31++,A4 ; |15| (P) <0,15>
|| MPYHL .M1 A10,A3,A3 ; |15| (P) <0,15>
|| MPYHL .M2 B27,B5,DP ; |15| (P) <0,15>
MV .L1 A13,A30
|| LDH .D1T1 *A29++,A13 ; |15| (P) <0,16>
|| MPY .M1X B27,A14,A14 ; |15| (P) <0,16>
|| MPY .M2X B25,A3,SP ; |15| (P) <0,16>
LDH .D1T1 *A28++,A3 ; |15| (P) <0,17>
|| ADD .L1X SP,A3,A3 ; |15| (P) <0,17>
|| LDH .D2T2 *B29++,B7 ; |15| (P) <1,0>
ADD .L1X DP,A3,A3 ; |15| (P) <0,18>
|| LDH .D1T1 *A26++,A13 ; |15| (P) <0,18>
MPYHL .M1 A6,A15,A4 ; |15| (P) <0,19>
|| MPYHL .M2 B25,B0,B5 ; |15| (P) <0,19>
|| ADD .L1 A14,A3,A3 ; |15| (P) <0,19>
|| LDH .D1T1 *A25++,A14 ; |15| (P) <0,19>
MPYHL .M1 A30,A4,A14 ; |15| (P) <0,20>
|| LDH .D1T1 *A23++,A14 ; |15| (P) <0,20>
LDH .D1T1 *A22++,A14 ; |15| (P) <0,21>
|| ADD .L2X B5,A3,B5 ; |15| (P) <0,21>
|| MPY .M1 A30,A13,A3 ; |15| (P) <0,21>
MPYHL .M1 A27,A3,A13 ; |15| (P) <0,22>
|| ADD .L2 SP,B5,B5 ; |15| (P) <0,22>
|| MPY .M2X A10,B7,SP ; |15| (P) <1,5>
|| LDH .D1T1 *A20++,A14 ; |15| (P) <0,22>
MPY .M1 A27,A13,A3 ; |15| (P) <0,23>
|| ADD .L2X A14,B5,B5 ; |15| (P) <0,23>
|| LDH .D1T1 *A19++,A14 ; |15| (P) <0,23>
LDH .D1T1 *A17++,A14 ; |15| (P) <0,24>
|| ADD .L2X A3,B5,B4 ; |15| (P) <0,24>
|| MPYHL .M1 A24,A14,A13 ; |15| (P) <0,24>
LDH .D1T1 *A8++,A14 ; |15| (P) <0,25>
|| MPY .M1 A24,A14,A3 ; |15| (P) <0,25>
|| ADD .L2X A13,B4,B4 ; |15| (P) <0,25>
|| LDH .D2T2 *B21++,B7 ; |15| (P) <0,25>
MV .L2 B6,B23
|| MPYHL .M1 A21,A14,A13 ; |15| (P) <0,26>
|| ADD .S2X A3,B4,B4 ; |15| (P) <0,26>
|| LDH .D1T1 *A2++,A14 ; |15| (P) <1,9>
|| LDH .D2T2 *B22++,B6 ; |15| (P) <0,26>
ADD .L2X A13,B4,B4 ; |15| (P) <0,27>
|| LDH .D1T1 *A11++,A3 ; |15| (P) <1,10>
|| LDH .D2T2 *B28++,B5 ; |15| (P) <1,10>
|| MPY .M1 A21,A14,A3 ; |15| (P) <0,27>
MPYHL .M1 A18,A14,A13 ; |15| (P) <0,28>
|| ADD .L2X A3,B4,B5 ; |15| (P) <0,28>
|| LDH .D1T1 *A1++,A3 ; |15| (P) <1,11>
|| LDH .D2T2 *B24++,B5 ; |15| (P) <0,28>
LDH .D2T2 *B19++,B0 ; |15| (P) <0,29>
|| MPY .M1 A18,A14,A15 ; |15| (P) <0,29>
|| ADD .L2X A13,B5,B6 ; |15| (P) <0,29>
|| LDH .D1T1 *A5++,A12 ; |15| (P) <0,29>
ADD .L2X A3,B6,B4 ; |15| (P) <0,30>
|| LDH .D1T1 *A16++,A12 ; |15| (P) <1,13>
|| LDH .D2T2 *B26++,B0 ; |15| (P) <1,13>
|| MPY .M1 A9,A14,A13 ; |15| (P) <0,30>
ADD .L2X A13,B4,B4 ; |15| (P) <0,31>
|| LDH .D1T1 *A7++,A15 ; |15| (P) <1,14>
|| MPYHL .M1 A9,A12,A15 ; |15| (P) <0,31>
ADD .L2X A15,B4,B4 ; |15| (P) <0,32>
|| LDH .D1T1 *A31++,A4 ; |15| (P) <1,15>
|| MPYHL .M1 A10,A3,A3 ; |15| (P) <1,15>
|| MPYHL .M2 B27,B5,DP ; |15| (P) <1,15>
MVK .L2 0x1,B31 ; |11|
|| LDH .D1T1 *A29++,A13 ; |15| (P) <1,16>
|| MPY .M1X B27,A14,A14 ; |15| (P) <1,16>
|| MPY .M2X B25,A3,SP ; |15| (P) <1,16>
;** --------------------------------------------------------------------------*
L4: ; PIPED LOOP KERNEL
DW$L$_matrix_mpyc$5$B:
MPY .M2 B20,B0,B4 ; |15| <0,34>
|| MPY .M1 A6,A12,A13 ; |15| <0,34>
|| ADD .L2X A15,B4,B4 ; |15| <0,34>
|| LDH .D1T1 *A28++,A3 ; |15| <1,17>
|| ADD .L1X SP,A3,A3 ; |15| <1,17>
|| LDH .D2T2 *B29++,B7 ; |15| <2,0>
LDH .D2T2 *B18++,B7 ; |15| <0,35>
|| MPYHL .M2 B23,B5,B0 ; |15| <0,35>
|| ADD .L2X A13,B4,B5 ; |15| <0,35>
|| LDH .D1T1 *A26++,A13 ; |15| <1,18>
|| ADD .L1X DP,A3,A3 ; |15| <1,18>
LDH .D2T2 *B16++,B6 ; |15| <0,36>
|| ADD .L2X A4,B5,B5 ; |15| <0,36>
|| LDH .D1T1 *A25++,A14 ; |15| <1,19>
|| MPYHL .M1 A6,A15,A4 ; |15| <1,19>
|| MPYHL .M2 B25,B0,B5 ; |15| <1,19>
|| ADD .L1 A14,A3,A3 ; |15| <1,19>
LDH .D2T2 *+B13[B31],B6 ; |15| <0,37> ^
|| MPY .M2 B23,B6,B6 ; |15| <0,37>
|| ADD .L2X A13,B5,B6 ; |15| <0,37>
|| LDH .D1T1 *A23++,A14 ; |15| <1,20>
|| MPYHL .M1 A30,A4,A14 ; |15| <1,20>
LDH .D2T2 *+B12[B31],B6 ; |15| <0,38>
|| MPYHL .M2 B20,B7,B6 ; |15| <0,38>
|| ADD .S2 B0,B6,B0 ; |15| <0,38>
|| LDH .D1T1 *A22++,A14 ; |15| <1,21>
|| MPY .M1 A30,A13,A3 ; |15| <1,21>
|| ADD .L2X B5,A3,B5 ; |15| <1,21>
LDH .D2T2 *+B11[B31],B5 ; |15| <0,39>
|| ADD .S2 B6,B0,B0 ; |15| <0,39>
|| LDH .D1T1 *A20++,A14 ; |15| <1,22>
|| MPYHL .M1 A27,A3,A13 ; |15| <1,22>
|| ADD .L2 SP,B5,B5 ; |15| <1,22>
|| MPY .M2X A10,B7,SP ; |15| <2,5>
LDH .D2T2 *+B10[B31],B6 ; |15| <0,40>
|| MPYHL .M2 B17,B7,B5 ; |15| <0,40>
|| ADD .S2 B6,B0,B7 ; |15| <0,40>
|| LDH .D1T1 *A19++,A14 ; |15| <1,23>
|| MPY .M1 A27,A13,A3 ; |15| <1,23>
|| ADD .L2X A14,B5,B5 ; |15| <1,23>
MPY .M2 B17,B6,B5 ; |15| <0,41>
|| ADD .S2 B4,B7,B7 ; |15| <0,41>
|| LDH .D1T1 *A17++,A14 ; |15| <1,24>
|| MPYHL .M1 A24,A14,A13 ; |15| <1,24>
|| ADD .L2X A3,B5,B4 ; |15| <1,24>
MPYHL .M2 B9,B6,B6 ; |15| <0,42> ^
|| ADD .S2 B5,B7,B7 ; |15| <0,42>
|| LDH .D2T2 *B21++,B7 ; |15| <1,25>
|| LDH .D1T1 *A8++,A14 ; |15| <1,25>
|| MPY .M1 A24,A14,A3 ; |15| <1,25>
|| ADD .L2X A13,B4,B4 ; |15| <1,25>
MPY .M2 B9,B6,B5 ; |15| <0,43>
|| ADD .S2 B5,B7,B7 ; |15| <0,43>
|| LDH .D2T2 *B22++,B6 ; |15| <1,26>
|| MPYHL .M1 A21,A14,A13 ; |15| <1,26>
|| ADD .L2X A3,B4,B4 ; |15| <1,26>
|| LDH .D1T1 *A2++,A14 ; |15| <2,9>
[ A0] SUB .L1 A0,1,A0 ; |11| <0,44>
|| MPYHL .M2 B8,B5,B6 ; |15| <0,44>
|| ADD .S2 B6,B7,B7 ; |15| <0,44> ^
|| MPY .M1 A21,A14,A3 ; |15| <1,27>
|| ADD .L2X A13,B4,B4 ; |15| <1,27>
|| LDH .D1T1 *A11++,A3 ; |15| <2,10>
|| LDH .D2T2 *B28++,B5 ; |15| <2,10>
[ A0] B .S1 L4 ; |11| <0,45>
|| MPY .M2 B8,B6,B5 ; |15| <0,45>
|| ADD .S2 B5,B7,B4 ; |15| <0,45> ^
|| LDH .D2T2 *B24++,B5 ; |15| <1,28>
|| MPYHL .M1 A18,A14,A13 ; |15| <1,28>
|| ADD .L2X A3,B4,B5 ; |15| <1,28>
|| LDH .D1T1 *A1++,A3 ; |15| <2,11>
ADD .S2 B6,B4,B4 ; |15| <0,46> ^
|| LDH .D1T1 *A5++,A12 ; |15| <1,29>
|| LDH .D2T2 *B19++,B0 ; |15| <1,29>
|| MPY .M1 A18,A14,A15 ; |15| <1,29>
|| ADD .L2X A13,B5,B6 ; |15| <1,29>
ADD .S2 B5,B4,DP ; |15| <0,47> ^
|| MPY .M1 A9,A14,A13 ; |15| <1,30>
|| ADD .L2X A3,B6,B4 ; |15| <1,30>
|| LDH .D1T1 *A16++,A12 ; |15| <2,13>
|| LDH .D2T2 *B26++,B0 ; |15| <2,13>
STW .D2T2 DP,*B30++ ; |19| <0,48>
|| CMPGT .L2 DP,B3,B0 ; <0,48> ^
|| MPYHL .M1 A9,A12,A15 ; |15| <1,31>
|| ADD .S2X A13,B4,B4 ; |15| <1,31>
|| LDH .D1T1 *A7++,A15 ; |15| <2,14>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -