📄 gemm_ncopy_hummer_8.s
字号:
.L112: LFDUX c01, AO1, INC LFDUX c05, AO1, INC LFDUX c09, AO1, INC LFDUX c13, AO1, INC LFDUX c17, AO1, INC LFDUX c21, AO1, INC LFDUX c25, AO1, INC LFDUX c29, AO1, INC LFSDUX c01, AO2, INC LFSDUX c05, AO2, INC LFSDUX c09, AO2, INC LFSDUX c13, AO2, INC LFSDUX c17, AO2, INC LFSDUX c21, AO2, INC LFSDUX c25, AO2, INC LFSDUX c29, AO2, INC LFDUX c02, AO3, INC LFDUX c06, AO3, INC LFDUX c10, AO3, INC LFDUX c14, AO3, INC LFDUX c18, AO3, INC LFDUX c22, AO3, INC LFDUX c26, AO3, INC LFDUX c30, AO3, INC LFSDUX c02, AO4, INC LFSDUX c06, AO4, INC LFSDUX c10, AO4, INC LFSDUX c14, AO4, INC LFSDUX c18, AO4, INC LFSDUX c22, AO4, INC LFSDUX c26, AO4, INC LFSDUX c30, AO4, INC LFDUX c03, AO5, INC LFDUX c07, AO5, INC LFDUX c11, AO5, INC LFDUX c15, AO5, INC LFDUX c19, AO5, INC LFDUX c23, AO5, INC LFDUX c27, AO5, INC LFDUX c31, AO5, INC LFSDUX c03, AO6, INC LFSDUX c07, AO6, INC LFSDUX c11, AO6, INC LFSDUX c15, AO6, INC LFSDUX c19, AO6, INC LFSDUX c23, AO6, INC LFSDUX c27, AO6, INC LFSDUX c31, AO6, INC LFDUX c04, AO7, INC LFDUX c08, AO7, INC LFDUX c12, AO7, INC LFDUX c16, AO7, INC LFDUX c20, AO7, INC LFDUX c24, AO7, INC LFDUX c28, AO7, INC LFDUX c32, AO7, INC LFSDUX c04, AO8, INC LFSDUX c08, AO8, INC LFSDUX c12, AO8, INC LFSDUX c16, AO8, INC LFSDUX c20, AO8, INC LFSDUX c24, AO8, INC LFSDUX c28, AO8, INC LFSDUX c32, AO8, INC STFPDUX c01, B, INC2 STFPDUX c02, B, INC2 STFPDUX c03, B, INC2 STFPDUX c04, B, INC2 STFPDUX c05, B, INC2 STFPDUX c06, B, INC2 STFPDUX c07, B, INC2 STFPDUX c08, B, INC2 STFPDUX c09, B, INC2 STFPDUX c10, B, INC2 STFPDUX c11, B, INC2 STFPDUX c12, B, INC2 STFPDUX c13, B, INC2 STFPDUX c14, B, INC2 STFPDUX c15, B, INC2 STFPDUX c16, B, INC2 STFPDUX c17, B, INC2 STFPDUX c18, B, INC2 STFPDUX c19, B, INC2 STFPDUX c20, B, INC2 STFPDUX c21, B, INC2 STFPDUX c22, B, INC2 STFPDUX c23, B, INC2 STFPDUX c24, B, INC2 STFPDUX c25, B, INC2 STFPDUX c26, B, INC2 STFPDUX c27, B, INC2 STFPDUX c28, B, INC2 STFPDUX c29, B, INC2 STFPDUX c30, B, INC2 STFPDUX c31, B, INC2 STFPDUX c32, B, INC2 bdnz .L112 .align 4 .L115: andi. r0, M, 7 ble .L119 andi. r0, M, 4 beq .L116 LFDUX c01, AO1, INC LFDUX c05, AO1, INC LFDUX c09, AO1, INC LFDUX c13, AO1, INC LFSDUX c01, AO2, INC LFSDUX c05, AO2, INC LFSDUX c09, AO2, INC LFSDUX c13, AO2, INC LFDUX c02, AO3, INC LFDUX c06, AO3, INC LFDUX c10, AO3, INC LFDUX c14, AO3, INC LFSDUX c02, AO4, INC LFSDUX c06, AO4, INC LFSDUX c10, AO4, INC LFSDUX c14, AO4, INC LFDUX c03, AO5, INC LFDUX c07, AO5, INC LFDUX c11, AO5, INC LFDUX c15, AO5, INC LFSDUX c03, AO6, INC LFSDUX c07, AO6, INC LFSDUX c11, AO6, INC LFSDUX c15, AO6, INC LFDUX c04, AO7, INC LFDUX c08, AO7, INC LFDUX c12, AO7, INC LFDUX c16, AO7, INC LFSDUX c04, AO8, INC LFSDUX c08, AO8, INC LFSDUX c12, AO8, INC LFSDUX c16, AO8, INC STFPDUX c01, B, INC2 STFPDUX c02, B, INC2 STFPDUX c03, B, INC2 STFPDUX c04, B, INC2 STFPDUX c05, B, INC2 STFPDUX c06, B, INC2 STFPDUX c07, B, INC2 STFPDUX c08, B, INC2 STFPDUX c09, B, INC2 STFPDUX c10, B, INC2 STFPDUX c11, B, INC2 STFPDUX c12, B, INC2 STFPDUX c13, B, INC2 STFPDUX c14, B, INC2 STFPDUX c15, B, INC2 STFPDUX c16, B, INC2 .align 4.L116: andi. r0, M, 2 beq .L117 LFDUX c01, AO1, INC LFDUX c05, AO1, INC LFDUX c02, AO3, INC LFDUX c06, AO3, INC LFSDUX c01, AO2, INC LFSDUX c05, AO2, INC LFSDUX c02, AO4, INC LFSDUX c06, AO4, INC LFDUX c03, AO5, INC LFDUX c07, AO5, INC LFDUX c04, AO7, INC LFDUX c08, AO7, INC LFSDUX c03, AO6, INC LFSDUX c07, AO6, INC LFSDUX c04, AO8, INC LFSDUX c08, AO8, INC STFPDUX c01, B, INC2 STFPDUX c02, B, INC2 STFPDUX c03, B, INC2 STFPDUX c04, B, INC2 STFPDUX c05, B, INC2 STFPDUX c06, B, INC2 STFPDUX c07, B, INC2 STFPDUX c08, B, INC2 .align 4.L117: andi. r0, M, 1 beq .L119 LFDUX c01, AO1, INC LFDUX c02, AO3, INC LFDUX c03, AO5, INC LFDUX c04, AO7, INC LFSDUX c01, AO2, INC LFSDUX c02, AO4, INC LFSDUX c03, AO6, INC LFSDUX c04, AO8, INC STFPDUX c01, B, INC2 STFPDUX c02, B, INC2 STFPDUX c03, B, INC2 STFPDUX c04, B, INC2 .align 4.L119: addic. J, J, -1 bgt .L111 .align 4.L120: andi. J, N, 4 ble .L130 .align 4.L121: mr AO1, A add AO2, A, LDA add AO3, AO2, LDA add AO4, AO3, LDA add A, AO4, LDA srawi. r0, M, 3 mtspr CTR, r0 ble .L125 .align 4.L122: LFDUX c01, AO1, INC LFDUX c02, AO1, INC LFDUX c03, AO1, INC LFDUX c04, AO1, INC LFDUX c09, AO1, INC LFDUX c10, AO1, INC LFDUX c11, AO1, INC LFDUX c12, AO1, INC LFSDUX c01, AO2, INC LFSDUX c02, AO2, INC LFSDUX c03, AO2, INC LFSDUX c04, AO2, INC LFSDUX c09, AO2, INC LFSDUX c10, AO2, INC LFSDUX c11, AO2, INC LFSDUX c12, AO2, INC LFDUX c05, AO3, INC LFDUX c06, AO3, INC LFDUX c07, AO3, INC LFDUX c08, AO3, INC LFDUX c13, AO3, INC LFDUX c14, AO3, INC LFDUX c15, AO3, INC LFDUX c16, AO3, INC LFSDUX c05, AO4, INC LFSDUX c06, AO4, INC LFSDUX c07, AO4, INC LFSDUX c08, AO4, INC LFSDUX c13, AO4, INC LFSDUX c14, AO4, INC LFSDUX c15, AO4, INC LFSDUX c16, AO4, INC STFPDUX c01, B, INC2 STFPDUX c05, B, INC2 STFPDUX c02, B, INC2 STFPDUX c06, B, INC2 STFPDUX c03, B, INC2 STFPDUX c07, B, INC2 STFPDUX c04, B, INC2 STFPDUX c08, B, INC2 STFPDUX c09, B, INC2 STFPDUX c13, B, INC2 STFPDUX c10, B, INC2 STFPDUX c14, B, INC2 STFPDUX c11, B, INC2 STFPDUX c15, B, INC2 STFPDUX c12, B, INC2 STFPDUX c16, B, INC2 bdnz .L122 .align 4 .L125: andi. r0, M, 7 ble .L130 andi. r0, M, 4 beq .L126 LFDUX c01, AO1, INC LFDUX c02, AO1, INC LFDUX c03, AO1, INC LFDUX c04, AO1, INC LFSDUX c01, AO2, INC LFSDUX c02, AO2, INC LFSDUX c03, AO2, INC LFSDUX c04, AO2, INC LFDUX c05, AO3, INC LFDUX c06, AO3, INC LFDUX c07, AO3, INC LFDUX c08, AO3, INC LFSDUX c05, AO4, INC LFSDUX c06, AO4, INC LFSDUX c07, AO4, INC LFSDUX c08, AO4, INC STFPDUX c01, B, INC2 STFPDUX c05, B, INC2 STFPDUX c02, B, INC2 STFPDUX c06, B, INC2 STFPDUX c03, B, INC2 STFPDUX c07, B, INC2 STFPDUX c04, B, INC2 STFPDUX c08, B, INC2 .align 4.L126: andi. r0, M, 2 beq .L127 LFDUX c01, AO1, INC LFDUX c02, AO1, INC LFSDUX c01, AO2, INC LFSDUX c02, AO2, INC LFDUX c05, AO3, INC LFDUX c06, AO3, INC LFSDUX c05, AO4, INC LFSDUX c06, AO4, INC STFPDUX c01, B, INC2 STFPDUX c05, B, INC2 STFPDUX c02, B, INC2 STFPDUX c06, B, INC2 .align 4.L127: andi. r0, M, 1 beq .L130 LFDUX c01, AO1, INC LFDUX c05, AO3, INC nop nop LFSDUX c01, AO2, INC LFSDUX c05, AO4, INC STFPDUX c01, B, INC2 STFPDUX c05, B, INC2 .align 4 .L130: andi. J, N, 2 ble .L140 mr AO1, A add AO2, A, LDA add A, AO2, LDA srawi. r0, M, 3 mtspr CTR, r0 ble .L135 .align 4.L132: LFDUX c01, AO1, INC LFDUX c02, AO1, INC LFDUX c03, AO1, INC LFDUX c04, AO1, INC LFDUX c09, AO1, INC LFDUX c10, AO1, INC LFDUX c11, AO1, INC LFDUX c12, AO1, INC LFSDUX c01, AO2, INC LFSDUX c02, AO2, INC LFSDUX c03, AO2, INC LFSDUX c04, AO2, INC LFSDUX c09, AO2, INC LFSDUX c10, AO2, INC LFSDUX c11, AO2, INC LFSDUX c12, AO2, INC STFPDUX c01, B, INC2 STFPDUX c02, B, INC2 STFPDUX c03, B, INC2 STFPDUX c04, B, INC2 STFPDUX c09, B, INC2 STFPDUX c10, B, INC2 STFPDUX c11, B, INC2 STFPDUX c12, B, INC2 bdnz .L132 .align 4 .L135: andi. r0, M, 7 ble .L140 andi. r0, M, 4 beq .L136 LFDUX c01, AO1, INC LFDUX c02, AO1, INC LFDUX c03, AO1, INC LFDUX c04, AO1, INC LFSDUX c01, AO2, INC LFSDUX c02, AO2, INC LFSDUX c03, AO2, INC LFSDUX c04, AO2, INC STFPDUX c01, B, INC2 STFPDUX c02, B, INC2 STFPDUX c03, B, INC2 STFPDUX c04, B, INC2 .align 4.L136: andi. r0, M, 2 beq .L137 LFDUX c01, AO1, INC LFDUX c02, AO1, INC LFSDUX c01, AO2, INC LFSDUX c02, AO2, INC STFPDUX c01, B, INC2 STFPDUX c02, B, INC2 .align 4.L137: andi. r0, M, 1 beq .L140 LFDUX c01, AO1, INC LFDUX c02, AO2, INC fsmfp c01, c02 STFPDUX c01, B, INC2 .align 4.L140: andi. J, N, 1 ble .L999 mr AO1, A srawi. r0, M, 3 mtspr CTR, r0 ble .L145 .align 4.L142: LFDUX c01, AO1, INC LFDUX c02, AO1, INC LFDUX c03, AO1, INC LFDUX c04, AO1, INC LFDUX c05, AO1, INC LFDUX c06, AO1, INC LFDUX c07, AO1, INC LFDUX c08, AO1, INC fsmfp c01, c02 fsmfp c03, c04 fsmfp c05, c06 fsmfp c07, c08 STFPDUX c01, B, INC2 STFPDUX c03, B, INC2 STFPDUX c05, B, INC2 STFPDUX c07, B, INC2 bdnz .L142 .align 4 .L145: andi. r0, M, 7 ble .L999 andi. r0, M, 4 beq .L146 LFDUX c01, AO1, INC LFDUX c02, AO1, INC LFDUX c03, AO1, INC LFDUX c04, AO1, INC fsmfp c01, c02 fsmfp c03, c04 STFPDUX c01, B, INC2 STFPDUX c03, B, INC2 .align 4.L146: andi. r0, M, 2 beq .L147 LFDUX c01, AO1, INC LFDUX c02, AO1, INC fsmfp c01, c02 STFPDUX c01, B, INC2 .align 4.L147: andi. r0, M, 1 beq .L999 LFDX c01, AO1, INC STFDX c01, B, INC2 .align 4.L999: addi SP, SP, 4 lwzu r26, 4(SP) lwzu r27, 4(SP) lwzu r28, 4(SP) lwzu r29, 4(SP) lwzu r30, 4(SP) lwzu r31, 4(SP) subi SP, SP, 12 li r0, 16 lfpdux f31, SP, r0 lfpdux f30, SP, r0 lfpdux f29, SP, r0 lfpdux f28, SP, r0 lfpdux f27, SP, r0 lfpdux f26, SP, r0 lfpdux f25, SP, r0 lfpdux f24, SP, r0 lfpdux f23, SP, r0 lfpdux f22, SP, r0 lfpdux f21, SP, r0 lfpdux f20, SP, r0 lfpdux f19, SP, r0 lfpdux f18, SP, r0 lfpdux f17, SP, r0 lfpdux f16, SP, r0 lfpdux f15, SP, r0 lfpdux f14, SP, r0 addi SP, SP, 16 blr EPILOGUE
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -