📄 gemv_t_ppc440.s
字号:
LFDU a1, 1 * SIZE(AO1) FMADD y06, a6, b2, y06 LFDU a2, 1 * SIZE(AO2) FMADD y07, a7, b2, y07 LFDU a3, 1 * SIZE(AO3) FMADD y08, a8, b2, y08 LFDU a4, 1 * SIZE(AO4) LFDU b4, 1 * SIZE(X1) FMADD y01, a1, b3, y01 LFDU a5, 1 * SIZE(AO1) FMADD y02, a2, b3, y02 LFDU a6, 1 * SIZE(AO2) FMADD y03, a3, b3, y03 LFDU a7, 1 * SIZE(AO3) FMADD y04, a4, b3, y04 LFDU a8, 1 * SIZE(AO4) FMADD y05, a5, b4, y05 FMADD y06, a6, b4, y06 FMADD y07, a7, b4, y07 FMADD y08, a8, b4, y08 .align 4LL(26): andi. r0, M, 2 ble LL(27) LFDU b1, 1 * SIZE(X1) LFDU a1, 1 * SIZE(AO1) LFDU a2, 1 * SIZE(AO2) LFDU a3, 1 * SIZE(AO3) LFDU a4, 1 * SIZE(AO4) LFDU b2, 1 * SIZE(X1) FMADD y01, a1, b1, y01 LFDU a5, 1 * SIZE(AO1) FMADD y02, a2, b1, y02 LFDU a6, 1 * SIZE(AO2) FMADD y03, a3, b1, y03 LFDU a7, 1 * SIZE(AO3) FMADD y04, a4, b1, y04 LFDU a8, 1 * SIZE(AO4) FMADD y05, a5, b2, y05 FMADD y06, a6, b2, y06 FMADD y07, a7, b2, y07 FMADD y08, a8, b2, y08 .align 4LL(27): andi. r0, M, 1 ble LL(28) LFDU a1, 1 * SIZE(AO1) LFDU b1, 1 * SIZE(X1) LFDU a2, 1 * SIZE(AO2) LFDU a3, 1 * SIZE(AO3) LFDU a4, 1 * SIZE(AO4) FMADD y01, a1, b1, y01 FMADD y02, a2, b1, y02 FMADD y03, a3, b1, y03 FMADD y04, a4, b1, y04 .align 4LL(28): lfd alpha, ALPHA LFDUX a1, Y, INCY LFDUX a2, Y, INCY LFDUX a3, Y, INCY LFDUX a4, Y, INCY FADD y01, y05, y01 FADD y02, y06, y02 FADD y03, y07, y03 FADD y04, y08, y04 FMADD a1, alpha, f0, a1 FMADD a2, alpha, f1, a2 FMADD a3, alpha, f2, a3 FMADD a4, alpha, f3, a4 STFDUX a1, YY, INCY addi J, J, -1 STFDUX a2, YY, INCY cmpi cr0, 0, J, 0 STFDUX a3, YY, INCY STFDUX a4, YY, INCY bgt LL(21) .align 4LL(30): andi. J, N, 2 ble LL(40) mr AO1, A add AO2, A, LDA add A, AO2, LDA mr X1, XP lfd y01, FZERO fmr y02, y01 fmr y03, y01 fmr y04, y01 srawi. r0, M, 3 mtspr CTR, r0 ble LL(34) LFDU a1, 1 * SIZE(AO1) LFDU a2, 1 * SIZE(AO2) LFDU b1, 1 * SIZE(X1) LFDU b2, 1 * SIZE(X1) LFDU a5, 1 * SIZE(AO1) LFDU a6, 1 * SIZE(AO2) LFDU b3, 1 * SIZE(X1) LFDU b4, 1 * SIZE(X1) bdz LL(33) .align 4LL(32):#ifdef PPCG4 dcbt X1, PREA#endif FMADD y01, a1, b1, y01 LFDU a1, 1 * SIZE(AO1) FMADD y02, a2, b1, y02 LFDU a2, 1 * SIZE(AO2) LFDU b1, 1 * SIZE(X1)#ifdef PPCG4 dcbt AO1, PREA#endif FMADD y03, a5, b2, y03 LFDU a5, 1 * SIZE(AO1) FMADD y04, a6, b2, y04 LFDU a6, 1 * SIZE(AO2) LFDU b2, 1 * SIZE(X1) FMADD y01, a1, b3, y01 LFDU a1, 1 * SIZE(AO1) FMADD y02, a2, b3, y02 LFDU a2, 1 * SIZE(AO2) LFDU b3, 1 * SIZE(X1)#ifdef PPCG4 dcbt AO2, PREA#endif FMADD y03, a5, b4, y03 LFDU a5, 1 * SIZE(AO1) FMADD y04, a6, b4, y04 LFDU a6, 1 * SIZE(AO2) LFDU b4, 1 * SIZE(X1) FMADD y01, a1, b1, y01 LFDU a1, 1 * SIZE(AO1) FMADD y02, a2, b1, y02 LFDU a2, 1 * SIZE(AO2)#if defined(PPCG4) && defined(DOUBLE) dcbt X1, PREA#endif LFDU b1, 1 * SIZE(X1)#if defined(PPCG4) && defined(DOUBLE) dcbt AO1, PREA#endif FMADD y03, a5, b2, y03 LFDU a5, 1 * SIZE(AO1) FMADD y04, a6, b2, y04 LFDU a6, 1 * SIZE(AO2) LFDU b2, 1 * SIZE(X1) FMADD y01, a1, b3, y01 LFDU a1, 1 * SIZE(AO1) FMADD y02, a2, b3, y02 LFDU a2, 1 * SIZE(AO2) LFDU b3, 1 * SIZE(X1)#if defined(PPCG4) && defined(DOUBLE) dcbt AO2, PREA#endif FMADD y03, a5, b4, y03 LFDU a5, 1 * SIZE(AO1) FMADD y04, a6, b4, y04 LFDU a6, 1 * SIZE(AO2) LFDU b4, 1 * SIZE(X1) bdnz LL(32) .align 4 LL(33): FMADD y01, a1, b1, y01 LFDU a1, 1 * SIZE(AO1) FMADD y02, a2, b1, y02 LFDU a2, 1 * SIZE(AO2) LFDU b1, 1 * SIZE(X1) FMADD y03, a5, b2, y03 LFDU a5, 1 * SIZE(AO1) FMADD y04, a6, b2, y04 LFDU a6, 1 * SIZE(AO2) LFDU b2, 1 * SIZE(X1) FMADD y01, a1, b3, y01 LFDU a1, 1 * SIZE(AO1) FMADD y02, a2, b3, y02 LFDU a2, 1 * SIZE(AO2) LFDU b3, 1 * SIZE(X1) FMADD y03, a5, b4, y03 LFDU a5, 1 * SIZE(AO1) FMADD y04, a6, b4, y04 LFDU a6, 1 * SIZE(AO2) LFDU b4, 1 * SIZE(X1) FMADD y01, a1, b1, y01 LFDU a1, 1 * SIZE(AO1) FMADD y02, a2, b1, y02 LFDU a2, 1 * SIZE(AO2) FMADD y03, a5, b2, y03 LFDU a5, 1 * SIZE(AO1) FMADD y04, a6, b2, y04 LFDU a6, 1 * SIZE(AO2) FMADD y01, a1, b3, y01 FMADD y02, a2, b3, y02 FMADD y03, a5, b4, y03 FMADD y04, a6, b4, y04 .align 4LL(34): andi. r0, M, 7 ble LL(38) andi. r0, M, 4 ble LL(36) LFDU a1, 1 * SIZE(AO1) LFDU a2, 1 * SIZE(AO2) LFDU b1, 1 * SIZE(X1) LFDU b2, 1 * SIZE(X1) FMADD y01, a1, b1, y01 LFDU a5, 1 * SIZE(AO1) FMADD y02, a2, b1, y02 LFDU a6, 1 * SIZE(AO2) LFDU b3, 1 * SIZE(X1) FMADD y03, a5, b2, y03 LFDU a1, 1 * SIZE(AO1) FMADD y04, a6, b2, y04 LFDU a2, 1 * SIZE(AO2) LFDU b4, 1 * SIZE(X1) FMADD y01, a1, b3, y01 LFDU a5, 1 * SIZE(AO1) FMADD y02, a2, b3, y02 LFDU a6, 1 * SIZE(AO2) FMADD y03, a5, b4, y03 FMADD y04, a6, b4, y04 .align 4LL(36): andi. r0, M, 2 ble LL(37) LFDU b1, 1 * SIZE(X1) LFDU a1, 1 * SIZE(AO1) LFDU a2, 1 * SIZE(AO2) LFDU b2, 1 * SIZE(X1) LFDU a3, 1 * SIZE(AO1) LFDU a4, 1 * SIZE(AO2) FMADD y01, a1, b1, y01 FMADD y02, a2, b1, y02 FMADD y03, a3, b2, y03 FMADD y04, a4, b2, y04 .align 4LL(37): andi. r0, M, 1 ble LL(38) LFDU a1, 1 * SIZE(AO1) LFDU b1, 1 * SIZE(X1) LFDU a2, 1 * SIZE(AO2) FMADD y01, a1, b1, y01 FMADD y02, a2, b1, y02 .align 4LL(38): lfd alpha, ALPHA LFDUX a1, Y, INCY LFDUX a2, Y, INCY FADD y01, y03, y01 FADD y02, y04, y02 FMADD a1, alpha, f0, a1 FMADD a2, alpha, f1, a2 STFDUX a1, YY, INCY STFDUX a2, YY, INCY .align 4LL(40): andi. J, N, 1 ble LL(999) mr AO1, A add A, A, LDA mr X1, XP lfd y01, FZERO fmr y02, y01 srawi. r0, M, 3 mtspr CTR, r0 ble LL(44) LFDU a1, 1 * SIZE(AO1) LFDU a2, 1 * SIZE(AO1) LFDU a3, 1 * SIZE(AO1) LFDU a4, 1 * SIZE(AO1) LFDU b1, 1 * SIZE(X1) LFDU b2, 1 * SIZE(X1) LFDU b3, 1 * SIZE(X1) LFDU b4, 1 * SIZE(X1) bdz LL(43) .align 4LL(42): FMADD y01, a1, b1, y01 LFDU a1, 1 * SIZE(AO1) LFDU b1, 1 * SIZE(X1)#ifdef PPCG4 dcbt X1, PREA#endif FMADD y02, a2, b2, y02 LFDU a2, 1 * SIZE(AO1) LFDU b2, 1 * SIZE(X1)#ifdef PPCG4 dcbt AO1, PREA#endif FMADD y01, a3, b3, y01 LFDU a3, 1 * SIZE(AO1) LFDU b3, 1 * SIZE(X1) FMADD y02, a4, b4, y02 LFDU a4, 1 * SIZE(AO1) LFDU b4, 1 * SIZE(X1) FMADD y01, a1, b1, y01 LFDU a1, 1 * SIZE(AO1) LFDU b1, 1 * SIZE(X1) FMADD y02, a2, b2, y02 LFDU a2, 1 * SIZE(AO1) LFDU b2, 1 * SIZE(X1)#if defined(PPCG4) && defined(DOUBLE) dcbt AO1, PREA#endif FMADD y01, a3, b3, y01 LFDU a3, 1 * SIZE(AO1) LFDU b3, 1 * SIZE(X1)#if defined(PPCG4) && defined(DOUBLE) dcbt X1, PREA#endif FMADD y02, a4, b4, y02 LFDU a4, 1 * SIZE(AO1) LFDU b4, 1 * SIZE(X1) bdnz LL(42) .align 4 LL(43): FMADD y01, a1, b1, y01 LFDU a1, 1 * SIZE(AO1) LFDU b1, 1 * SIZE(X1) FMADD y02, a2, b2, y02 LFDU a2, 1 * SIZE(AO1) LFDU b2, 1 * SIZE(X1) FMADD y01, a3, b3, y01 LFDU a3, 1 * SIZE(AO1) LFDU b3, 1 * SIZE(X1) FMADD y02, a4, b4, y02 LFDU a4, 1 * SIZE(AO1) LFDU b4, 1 * SIZE(X1) FMADD y01, a1, b1, y01 FMADD y02, a2, b2, y02 FMADD y01, a3, b3, y01 FMADD y02, a4, b4, y02 .align 4LL(44): andi. r0, M, 7 ble LL(48) andi. r0, M, 4 ble LL(46) LFDU a1, 1 * SIZE(AO1) LFDU b1, 1 * SIZE(X1) LFDU a2, 1 * SIZE(AO1) LFDU b2, 1 * SIZE(X1) FMADD y01, a1, b1, y01 LFDU a3, 1 * SIZE(AO1) LFDU b3, 1 * SIZE(X1) FMADD y02, a2, b2, y02 LFDU a4, 1 * SIZE(AO1) LFDU b4, 1 * SIZE(X1) FMADD y01, a3, b3, y01 FMADD y02, a4, b4, y02 .align 4LL(46): andi. r0, M, 2 ble LL(47) LFDU b1, 1 * SIZE(X1) LFDU a1, 1 * SIZE(AO1) LFDU b2, 1 * SIZE(X1) LFDU a2, 1 * SIZE(AO1) FMADD y01, a1, b1, y01 FMADD y02, a2, b2, y02 .align 4LL(47): andi. r0, M, 1 ble LL(48) LFDU a1, 1 * SIZE(AO1) LFDU b1, 1 * SIZE(X1) FMADD y01, a1, b1, y01 .align 4LL(48): lfd alpha, ALPHA LFDUX a1, Y, INCY FADD y01, y02, y01 FMADD a1, alpha, f0, a1 STFDUX a1, YY, INCY .align 4LL(999): li r3, 0 lfd f14, 0(SP) lfd f15, 8(SP) lfd f16, 16(SP) lfd f17, 24(SP) lfd f18, 32(SP) lfd f19, 40(SP) lfd f20, 48(SP) lfd f21, 56(SP) lfd f22, 64(SP) lfd f23, 72(SP)#ifdef __64BIT__ ld r14, 160(SP) ld r15, 168(SP) ld r16, 176(SP) ld r17, 184(SP) ld r18, 192(SP) ld r19, 200(SP) ld r20, 208(SP) ld r21, 216(SP) ld r22, 224(SP)#else lwz r14, 160(SP) lwz r15, 164(SP) lwz r16, 168(SP) lwz r17, 172(SP) lwz r18, 176(SP) lwz r19, 180(SP) lwz r20, 184(SP) lwz r21, 188(SP) lwz r22, 192(SP)#endif addi SP, SP, STACKSIZE blr EPILOGUE#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -