📄 zgemv_t_ppc440.s
字号:
LL(15): andi. r0, M, 2 ble LL(17) LFDU a1, 1 * SIZE(AO1) LFDU b1, 1 * SIZE(X1) LFDU a2, 1 * SIZE(AO1) LFDU b2, 1 * SIZE(X1) LFDU a3, 1 * SIZE(AO2) LFDU b3, 1 * SIZE(X1) LFDU a4, 1 * SIZE(AO2) LFDU b4, 1 * SIZE(X1) FMADD1 y1, a1, b1, y1 LFDU a5, 1 * SIZE(AO3) FMADD2 y2, a1, b2, y2 LFDU a6, 1 * SIZE(AO3) FMADD1 y3, a3, b1, y3 LFDU a7, 1 * SIZE(AO4) FMADD2 y4, a3, b2, y4 LFDU a8, 1 * SIZE(AO4) FMADD3 y1, a2, b2, y1 LFDU a1, 1 * SIZE(AO1) FMADD4 y2, a2, b1, y2 LFDU a2, 1 * SIZE(AO1) FMADD3 y3, a4, b2, y3 LFDU a3, 1 * SIZE(AO2) FMADD4 y4, a4, b1, y4 LFDU a4, 1 * SIZE(AO2) FMADD1 y5, a5, b1, y5 FMADD2 y6, a5, b2, y6 FMADD1 y7, a7, b1, y7 FMADD2 y8, a7, b2, y8 FMADD3 y5, a6, b2, y5 LFDU a5, 1 * SIZE(AO3) FMADD4 y6, a6, b1, y6 LFDU a6, 1 * SIZE(AO3) FMADD3 y7, a8, b2, y7 LFDU a7, 1 * SIZE(AO4) FMADD4 y8, a8, b1, y8 LFDU a8, 1 * SIZE(AO4) FMADD1 y1, a1, b3, y1 FMADD2 y2, a1, b4, y2 FMADD1 y3, a3, b3, y3 FMADD2 y4, a3, b4, y4 FMADD3 y1, a2, b4, y1 FMADD4 y2, a2, b3, y2 FMADD3 y3, a4, b4, y3 FMADD4 y4, a4, b3, y4 FMADD1 y5, a5, b3, y5 FMADD2 y6, a5, b4, y6 FMADD1 y7, a7, b3, y7 FMADD2 y8, a7, b4, y8 FMADD3 y5, a6, b4, y5 FMADD4 y6, a6, b3, y6 FMADD3 y7, a8, b4, y7 FMADD4 y8, a8, b3, y8 .align 4LL(17): andi. r0, M, 1 ble LL(19) LFDU a1, 1 * SIZE(AO1) LFDU a2, 1 * SIZE(AO1) LFDU a3, 1 * SIZE(AO2) LFDU a4, 1 * SIZE(AO2) LFDU a5, 1 * SIZE(AO3) LFDU a6, 1 * SIZE(AO3) LFDU a7, 1 * SIZE(AO4) LFDU a8, 1 * SIZE(AO4) LFDU b1, 1 * SIZE(X1) LFDU b2, 1 * SIZE(X1) FMADD1 y1, a1, b1, y1 FMADD2 y2, a1, b2, y2 FMADD1 y3, a3, b1, y3 FMADD2 y4, a3, b2, y4 FMADD3 y1, a2, b2, y1 FMADD4 y2, a2, b1, y2 FMADD3 y3, a4, b2, y3 FMADD4 y4, a4, b1, y4 FMADD1 y5, a5, b1, y5 FMADD2 y6, a5, b2, y6 FMADD1 y7, a7, b1, y7 FMADD2 y8, a7, b2, y8 FMADD3 y5, a6, b2, y5 FMADD4 y6, a6, b1, y6 FMADD3 y7, a8, b2, y7 FMADD4 y8, a8, b1, y8 .align 4LL(19): LFDUX b1, Y, INCY LFDU b2, 1 * SIZE(Y) LFDUX b3, Y, INCY LFDU b4, 1 * SIZE(Y) LFDUX b5, Y, INCY LFDU b6, 1 * SIZE(Y) LFDUX b7, Y, INCY LFDU b8, 1 * SIZE(Y) FMADD b1, alpha_r, y1, b1 FMADDR b2, alpha_r, y2, b2 FMADD b3, alpha_r, y3, b3 FMADDR b4, alpha_r, y4, b4 FMADD b5, alpha_r, y5, b5 FMADDR b6, alpha_r, y6, b6 FMADD b7, alpha_r, y7, b7 FMADDR b8, alpha_r, y8, b8 FMSUBR b1, alpha_i, y2, b1 FMADD b2, alpha_i, y1, b2 FMSUBR b3, alpha_i, y4, b3 FMADD b4, alpha_i, y3, b4 FMSUBR b5, alpha_i, y6, b5 FMADD b6, alpha_i, y5, b6 FMSUBR b7, alpha_i, y8, b7 FMADD b8, alpha_i, y7, b8 STFDUX b1, YY, INCY STFDU b2, 1 * SIZE(YY) STFDUX b3, YY, INCY STFDU b4, 1 * SIZE(YY) STFDUX b5, YY, INCY STFDU b6, 1 * SIZE(YY) STFDUX b7, YY, INCY STFDU b8, 1 * SIZE(YY) addi J, J, -1 cmpwi cr0, J, 0 bgt LL(11) .align 4 LL(20): andi. J, N, 2 ble LL(30) lfd y1, FZERO mr AO1, A fmr y2, y1 mr X1, XP fmr y3, y1 add AO2, A, LDA fmr y4, y1 add A, AO2, LDA srawi. r0, M, 2 mtspr CTR, r0 ble LL(25) LFDU a1, 1 * SIZE(AO1) LFDU b1, 1 * SIZE(X1) LFDU a2, 1 * SIZE(AO1) LFDU b2, 1 * SIZE(X1) LFDU a3, 1 * SIZE(AO2) bdz LL(23) .align 5LL(22): FMADD1 y1, a1, b1, y1 LFDU a4, 1 * SIZE(AO2) FMADD2 y2, a1, b2, y2 LFDU b3, 1 * SIZE(X1) FMADD1 y3, a3, b1, y3 LFDU b4, 1 * SIZE(X1) FMADD2 y4, a3, b2, y4#ifdef PPCG4 dcbt AO1, PREA#endif FMADD3 y1, a2, b2, y1 LFDU a1, 1 * SIZE(AO1) FMADD4 y2, a2, b1, y2 LFDU a2, 1 * SIZE(AO1) FMADD3 y3, a4, b2, y3 LFDU a3, 1 * SIZE(AO2) FMADD4 y4, a4, b1, y4 LFDU a4, 1 * SIZE(AO2)#ifdef PPCG4 dcbt AO2, PREA#endif FMADD1 y1, a1, b3, y1 LFDU b1, 1 * SIZE(X1) FMADD2 y2, a1, b4, y2 LFDU b2, 1 * SIZE(X1) FMADD1 y3, a3, b3, y3 FMADD2 y4, a3, b4, y4#ifdef PPCG4 dcbt X1, PREA#endif FMADD3 y1, a2, b4, y1 LFDU a1, 1 * SIZE(AO1) FMADD4 y2, a2, b3, y2 LFDU a2, 1 * SIZE(AO1) FMADD3 y3, a4, b4, y3 LFDU a3, 1 * SIZE(AO2) FMADD4 y4, a4, b3, y4 LFDU a4, 1 * SIZE(AO2) FMADD1 y1, a1, b1, y1 LFDU b3, 1 * SIZE(X1) FMADD2 y2, a1, b2, y2 LFDU b4, 1 * SIZE(X1) FMADD1 y3, a3, b1, y3 FMADD2 y4, a3, b2, y4#if defined(PPCG4) && defined(DOUBLE) dcbt AO1, PREA#endif FMADD3 y1, a2, b2, y1 LFDU a1, 1 * SIZE(AO1) FMADD4 y2, a2, b1, y2 LFDU a2, 1 * SIZE(AO1) FMADD3 y3, a4, b2, y3 LFDU a3, 1 * SIZE(AO2) FMADD4 y4, a4, b1, y4 LFDU a4, 1 * SIZE(AO2)#if defined(PPCG4) && defined(DOUBLE) dcbt AO2, PREA#endif FMADD1 y1, a1, b3, y1 LFDU b1, 1 * SIZE(X1) FMADD2 y2, a1, b4, y2 LFDU b2, 1 * SIZE(X1) FMADD1 y3, a3, b3, y3 FMADD2 y4, a3, b4, y4#if defined(PPCG4) && defined(DOUBLE) dcbt X1, PREA#endif FMADD3 y1, a2, b4, y1 LFDU a1, 1 * SIZE(AO1) FMADD4 y2, a2, b3, y2 LFDU a2, 1 * SIZE(AO1) FMADD3 y3, a4, b4, y3 LFDU a3, 1 * SIZE(AO2) FMADD4 y4, a4, b3, y4 bdnz LL(22) .align 4 LL(23): FMADD1 y1, a1, b1, y1 LFDU a4, 1 * SIZE(AO2) FMADD2 y2, a1, b2, y2 LFDU b3, 1 * SIZE(X1) FMADD1 y3, a3, b1, y3 LFDU b4, 1 * SIZE(X1) FMADD2 y4, a3, b2, y4 FMADD3 y1, a2, b2, y1 LFDU a1, 1 * SIZE(AO1) FMADD4 y2, a2, b1, y2 LFDU a2, 1 * SIZE(AO1) FMADD3 y3, a4, b2, y3 LFDU a3, 1 * SIZE(AO2) FMADD4 y4, a4, b1, y4 LFDU a4, 1 * SIZE(AO2) FMADD1 y1, a1, b3, y1 LFDU b1, 1 * SIZE(X1) FMADD2 y2, a1, b4, y2 LFDU b2, 1 * SIZE(X1) FMADD1 y3, a3, b3, y3 FMADD2 y4, a3, b4, y4 FMADD3 y1, a2, b4, y1 LFDU a1, 1 * SIZE(AO1) FMADD4 y2, a2, b3, y2 LFDU a2, 1 * SIZE(AO1) FMADD3 y3, a4, b4, y3 LFDU a3, 1 * SIZE(AO2) FMADD4 y4, a4, b3, y4 LFDU a4, 1 * SIZE(AO2) FMADD1 y1, a1, b1, y1 LFDU b3, 1 * SIZE(X1) FMADD2 y2, a1, b2, y2 LFDU b4, 1 * SIZE(X1) FMADD1 y3, a3, b1, y3 FMADD2 y4, a3, b2, y4 FMADD3 y1, a2, b2, y1 LFDU a1, 1 * SIZE(AO1) FMADD4 y2, a2, b1, y2 LFDU a2, 1 * SIZE(AO1) FMADD3 y3, a4, b2, y3 LFDU a3, 1 * SIZE(AO2) FMADD4 y4, a4, b1, y4 LFDU a4, 1 * SIZE(AO2) FMADD1 y1, a1, b3, y1 FMADD2 y2, a1, b4, y2 FMADD1 y3, a3, b3, y3 FMADD2 y4, a3, b4, y4 FMADD3 y1, a2, b4, y1 FMADD4 y2, a2, b3, y2 FMADD3 y3, a4, b4, y3 FMADD4 y4, a4, b3, y4 .align 4LL(25): andi. r0, M, 2 ble LL(27) LFDU a1, 1 * SIZE(AO1) LFDU b1, 1 * SIZE(X1) LFDU a2, 1 * SIZE(AO1) LFDU b2, 1 * SIZE(X1) LFDU a3, 1 * SIZE(AO2) LFDU b3, 1 * SIZE(X1) LFDU a4, 1 * SIZE(AO2) LFDU b4, 1 * SIZE(X1) FMADD1 y1, a1, b1, y1 FMADD2 y2, a1, b2, y2 FMADD1 y3, a3, b1, y3 FMADD2 y4, a3, b2, y4 FMADD3 y1, a2, b2, y1 LFDU a1, 1 * SIZE(AO1) FMADD4 y2, a2, b1, y2 LFDU a2, 1 * SIZE(AO1) FMADD3 y3, a4, b2, y3 LFDU a3, 1 * SIZE(AO2) FMADD4 y4, a4, b1, y4 LFDU a4, 1 * SIZE(AO2) FMADD1 y1, a1, b3, y1 FMADD2 y2, a1, b4, y2 FMADD1 y3, a3, b3, y3 FMADD2 y4, a3, b4, y4 FMADD3 y1, a2, b4, y1 FMADD4 y2, a2, b3, y2 FMADD3 y3, a4, b4, y3 FMADD4 y4, a4, b3, y4 .align 4LL(27): andi. r0, M, 1 ble LL(29) LFDU a1, 1 * SIZE(AO1) LFDU a2, 1 * SIZE(AO1) LFDU a3, 1 * SIZE(AO2) LFDU a4, 1 * SIZE(AO2) LFDU b1, 1 * SIZE(X1) LFDU b2, 1 * SIZE(X1) FMADD1 y1, a1, b1, y1 FMADD2 y2, a1, b2, y2 FMADD1 y3, a3, b1, y3 FMADD2 y4, a3, b2, y4 FMADD3 y1, a2, b2, y1 FMADD4 y2, a2, b1, y2 FMADD3 y3, a4, b2, y3 FMADD4 y4, a4, b1, y4 .align 4LL(29): LFDUX b1, Y, INCY LFDU b2, 1 * SIZE(Y) LFDUX b3, Y, INCY LFDU b4, 1 * SIZE(Y) FMADD b1, alpha_r, y1, b1 FMADDR b2, alpha_r, y2, b2 FMADD b3, alpha_r, y3, b3 FMADDR b4, alpha_r, y4, b4 FMSUBR b1, alpha_i, y2, b1 FMADD b2, alpha_i, y1, b2 FMSUBR b3, alpha_i, y4, b3 FMADD b4, alpha_i, y3, b4 STFDUX b1, YY, INCY STFDU b2, 1 * SIZE(YY) STFDUX b3, YY, INCY STFDU b4, 1 * SIZE(YY) .align 4LL(30): andi. J, N, 1 ble LL(999) lfd y1, FZERO mr AO1, A fmr y2, y1 mr X1, XP fmr y3, y1 fmr y4, y1 add A, A, LDA srawi. r0, M, 2 mtspr CTR, r0 ble LL(35) LFDU a1, 1 * SIZE(AO1) LFDU b1, 1 * SIZE(X1) LFDU a2, 1 * SIZE(AO1) LFDU b2, 1 * SIZE(X1) bdz LL(33) .align 5LL(32): FMADD1 y1, a1, b1, y1 LFDU b3, 1 * SIZE(X1) FMADD2 y2, a1, b2, y2 LFDU b4, 1 * SIZE(X1)#ifdef PPCG4 dcbt AO1, PREA#endif FMADD3 y3, a2, b2, y3 LFDU a1, 1 * SIZE(AO1) FMADD4 y4, a2, b1, y4 LFDU a2, 1 * SIZE(AO1) FMADD1 y1, a1, b3, y1 LFDU b1, 1 * SIZE(X1) FMADD2 y2, a1, b4, y2 LFDU b2, 1 * SIZE(X1)#ifdef PPCG4 dcbt X1, PREA#endif FMADD3 y3, a2, b4, y3 LFDU a1, 1 * SIZE(AO1) FMADD4 y4, a2, b3, y4 LFDU a2, 1 * SIZE(AO1) FMADD1 y1, a1, b1, y1 LFDU b3, 1 * SIZE(X1) FMADD2 y2, a1, b2, y2 LFDU b4, 1 * SIZE(X1)#if defined(PPCG4) && defined(DOUBLE) dcbt AO1, PREA#endif FMADD3 y3, a2, b2, y3 LFDU a1, 1 * SIZE(AO1) FMADD4 y4, a2, b1, y4 LFDU a2, 1 * SIZE(AO1) FMADD1 y1, a1, b3, y1 LFDU b1, 1 * SIZE(X1) FMADD2 y2, a1, b4, y2 LFDU b2, 1 * SIZE(X1)#if defined(PPCG4) && defined(DOUBLE) dcbt X1, PREA#endif FMADD3 y3, a2, b4, y3 LFDU a1, 1 * SIZE(AO1) FMADD4 y4, a2, b3, y4 LFDU a2, 1 * SIZE(AO1) bdnz LL(32) .align 4 LL(33): FMADD1 y1, a1, b1, y1 LFDU b3, 1 * SIZE(X1) FMADD2 y2, a1, b2, y2 LFDU b4, 1 * SIZE(X1) FMADD3 y3, a2, b2, y3 LFDU a1, 1 * SIZE(AO1) FMADD4 y4, a2, b1, y4 LFDU a2, 1 * SIZE(AO1) FMADD1 y1, a1, b3, y1 LFDU b1, 1 * SIZE(X1) FMADD2 y2, a1, b4, y2 LFDU b2, 1 * SIZE(X1) FMADD3 y3, a2, b4, y3 LFDU a1, 1 * SIZE(AO1) FMADD4 y4, a2, b3, y4 LFDU a2, 1 * SIZE(AO1) FMADD1 y1, a1, b1, y1 LFDU b3, 1 * SIZE(X1) FMADD2 y2, a1, b2, y2 LFDU b4, 1 * SIZE(X1) FMADD3 y3, a2, b2, y3 LFDU a1, 1 * SIZE(AO1) FMADD4 y4, a2, b1, y4 LFDU a2, 1 * SIZE(AO1) FMADD1 y1, a1, b3, y1 FMADD2 y2, a1, b4, y2 FMADD3 y3, a2, b4, y3 FMADD4 y4, a2, b3, y4 .align 4LL(35): andi. r0, M, 2 ble LL(37) LFDU a1, 1 * SIZE(AO1) LFDU b1, 1 * SIZE(X1) LFDU a2, 1 * SIZE(AO1) LFDU b2, 1 * SIZE(X1) FMADD1 y1, a1, b1, y1 LFDU b3, 1 * SIZE(X1) FMADD2 y2, a1, b2, y2 LFDU a3, 1 * SIZE(AO1) FMADD3 y3, a2, b2, y3 LFDU b4, 1 * SIZE(X1) FMADD4 y4, a2, b1, y4 LFDU a4, 1 * SIZE(AO1) FMADD1 y1, a3, b3, y1 FMADD2 y2, a3, b4, y2 FMADD3 y3, a4, b4, y3 FMADD4 y4, a4, b3, y4 .align 4LL(37): andi. r0, M, 1 ble LL(39) LFDU a1, 1 * SIZE(AO1) LFDU b1, 1 * SIZE(X1) LFDU a2, 1 * SIZE(AO1) LFDU b2, 1 * SIZE(X1) FMADD1 y1, a1, b1, y1 FMADD2 y2, a1, b2, y2 FMADD3 y3, a2, b2, y3 FMADD4 y4, a2, b1, y4 .align 4LL(39): LFDUX b1, Y, INCY LFDU b2, 1 * SIZE(Y) FADD y1, y1, y3 FADD y2, y2, y4 FMADD b1, alpha_r, y1, b1 FMADDR b2, alpha_r, y2, b2 FMSUBR b1, alpha_i, y2, b1 FMADD b2, alpha_i, y1, b2 STFDUX b1, YY, INCY STFDU b2, 1 * SIZE(YY) .align 4LL(999): li r3, 0 lfd f14, 0(SP) lfd f15, 8(SP) lfd f16, 16(SP) lfd f17, 24(SP) lfd f18, 32(SP) lfd f19, 40(SP) lfd f20, 48(SP) lfd f21, 56(SP) lfd f22, 64(SP) lfd f23, 72(SP) lfd f24, 80(SP) lfd f25, 88(SP) lfd f26, 96(SP) lfd f27, 104(SP) lfd f28, 112(SP) lfd f29, 120(SP) lfd f30, 128(SP) lfd f31, 136(SP)#ifdef __64BIT__ ld r14, 144(SP) ld r15, 152(SP) ld r16, 160(SP) ld r17, 168(SP) ld r18, 176(SP) ld r19, 184(SP) ld r20, 192(SP) ld r21, 200(SP) ld r22, 208(SP)#else lwz r14, 144(SP) lwz r15, 148(SP) lwz r16, 152(SP) lwz r17, 156(SP) lwz r18, 160(SP) lwz r19, 164(SP) lwz r20, 168(SP) lwz r21, 172(SP) lwz r22, 176(SP)#endif addi SP, SP, STACKSIZE blr EPILOGUE#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -