📄 symv_u.s
字号:
LFD a10, 1 * SIZE(AO3) FMADD xsum3, xtemp3, a11, xsum3 NOP1 FMADD y03, atemp3, a11, y03 LFD a11, 2 * SIZE(AO3) FMADD xsum4, xtemp3, a15, xsum4 LFD xtemp3, 18 * SIZE(XX) FMADD y04, atemp3, a12, y04 addi XX, XX, 16 * SIZE FMADD xsum1, xtemp4, a4, xsum1 LFD a4, 3 * SIZE(AO1) FMADD y01, atemp4, a13, y01 LFD a13, 0 * SIZE(AO4) FMADD xsum2, xtemp4, a8, xsum2 LFD a8, 3 * SIZE(AO2) FMADD y02, atemp4, a14, y02 LFD a14, 1 * SIZE(AO4) FMADD xsum3, xtemp4, a12, xsum3 LFD a12, 3 * SIZE(AO3) FMADD y03, atemp4, a15, y03 LFD a15, 2 * SIZE(AO4) FMADD xsum4, xtemp4, a16, xsum4 LFD xtemp4, 3 * SIZE(XX) FMADD y04, atemp4, a16, y04 LFD a16, 3 * SIZE(AO4) STFD y01, -4 * SIZE(YY) LFD y01, 0 * SIZE(YY) STFD y02, -3 * SIZE(YY) LFD y02, 1 * SIZE(YY) STFD y03, -2 * SIZE(YY) LFD y03, 2 * SIZE(YY) STFD y04, -1 * SIZE(YY) LFD y04, 3 * SIZE(YY) bdnz LL(12) .align 4LL(14): andi. r0, IS, 8 ble LL(15) FMADD xsum1, xtemp1, a1, xsum1 NOP1 FMADD y01, atemp1, a1, y01 LFD a1, 4 * SIZE(AO1) FMADD xsum2, xtemp1, a5, xsum2 NOP1 FMADD y02, atemp1, a2, y02 NOP2 FMADD xsum3, xtemp1, a9, xsum3 NOP1 FMADD y03, atemp1, a3, y03 NOP2 FMADD xsum4, xtemp1, a13, xsum4 LFD xtemp1, 4 * SIZE(XX) FMADD y04, atemp1, a4, y04 NOP2 FMADD xsum1, xtemp2, a2, xsum1 LFD a2, 5 * SIZE(AO1) FMADD y01, atemp2, a5, y01 LFD a5, 4 * SIZE(AO2) FMADD xsum2, xtemp2, a6, xsum2 NOP1 FMADD y02, atemp2, a6, y02 LFD a6, 5 * SIZE(AO2) FMADD xsum3, xtemp2, a10, xsum3 NOP1 FMADD y03, atemp2, a7, y03 NOP2 FMADD xsum4, xtemp2, a14, xsum4 LFD xtemp2, 5 * SIZE(XX) FMADD y04, atemp2, a8, y04 NOP2 FMADD xsum1, xtemp3, a3, xsum1 LFD a3, 6 * SIZE(AO1) FMADD y01, atemp3, a9, y01 LFD a9, 4 * SIZE(AO3) FMADD xsum2, xtemp3, a7, xsum2 LFD a7, 6 * SIZE(AO2) FMADD y02, atemp3, a10, y02 LFD a10, 5 * SIZE(AO3) FMADD xsum3, xtemp3, a11, xsum3 NOP1 FMADD y03, atemp3, a11, y03 LFD a11, 6 * SIZE(AO3) FMADD xsum4, xtemp3, a15, xsum4 LFD xtemp3, 6 * SIZE(XX) FMADD y04, atemp3, a12, y04 NOP2 FMADD xsum1, xtemp4, a4, xsum1 LFD a4, 7 * SIZE(AO1) FMADD y01, atemp4, a13, y01 LFD a13, 4 * SIZE(AO4) FMADD xsum2, xtemp4, a8, xsum2 LFD a8, 7 * SIZE(AO2) FMADD y02, atemp4, a14, y02 LFD a14, 5 * SIZE(AO4) FMADD xsum3, xtemp4, a12, xsum3 LFD a12, 7 * SIZE(AO3) FMADD y03, atemp4, a15, y03 LFD a15, 6 * SIZE(AO4) FMADD xsum4, xtemp4, a16, xsum4 LFD xtemp4, 7 * SIZE(XX) FMADD y04, atemp4, a16, y04 LFD a16, 7 * SIZE(AO4) STFD y01, 0 * SIZE(YY) LFD y01, 4 * SIZE(YY) STFD y02, 1 * SIZE(YY) LFD y02, 5 * SIZE(YY) STFD y03, 2 * SIZE(YY) LFD y03, 6 * SIZE(YY) STFD y04, 3 * SIZE(YY) LFD y04, 7 * SIZE(YY) FMADD xsum1, xtemp1, a1, xsum1 NOP1 FMADD y01, atemp1, a1, y01 LFD a1, 8 * SIZE(AO1) FMADD xsum2, xtemp1, a5, xsum2 NOP1 FMADD y02, atemp1, a2, y02 NOP2 FMADD xsum3, xtemp1, a9, xsum3 NOP1 FMADD y03, atemp1, a3, y03 NOP2 FMADD xsum4, xtemp1, a13, xsum4 LFD xtemp1, 8 * SIZE(XX) FMADD y04, atemp1, a4, y04 NOP2 FMADD xsum1, xtemp2, a2, xsum1 LFD a2, 9 * SIZE(AO1) FMADD y01, atemp2, a5, y01 LFD a5, 8 * SIZE(AO2) FMADD xsum2, xtemp2, a6, xsum2 NOP1 FMADD y02, atemp2, a6, y02 LFD a6, 9 * SIZE(AO2) FMADD xsum3, xtemp2, a10, xsum3 NOP1 FMADD y03, atemp2, a7, y03 NOP2 FMADD xsum4, xtemp2, a14, xsum4 LFD xtemp2, 9 * SIZE(XX) FMADD y04, atemp2, a8, y04 NOP2 FMADD xsum1, xtemp3, a3, xsum1 LFD a3, 10 * SIZE(AO1) FMADD y01, atemp3, a9, y01 LFD a9, 8 * SIZE(AO3) FMADD xsum2, xtemp3, a7, xsum2 LFD a7, 10 * SIZE(AO2) FMADD y02, atemp3, a10, y02 LFD a10, 9 * SIZE(AO3) FMADD xsum3, xtemp3, a11, xsum3 NOP1 FMADD y03, atemp3, a11, y03 LFD a11, 10 * SIZE(AO3) FMADD xsum4, xtemp3, a15, xsum4 LFD xtemp3, 10 * SIZE(XX) FMADD y04, atemp3, a12, y04 NOP2 FMADD xsum1, xtemp4, a4, xsum1 LFD a4, 11 * SIZE(AO1) FMADD y01, atemp4, a13, y01 LFD a13, 8 * SIZE(AO4) FMADD xsum2, xtemp4, a8, xsum2 LFD a8, 11 * SIZE(AO2) FMADD y02, atemp4, a14, y02 LFD a14, 9 * SIZE(AO4) FMADD xsum3, xtemp4, a12, xsum3 LFD a12, 11 * SIZE(AO3) FMADD y03, atemp4, a15, y03 LFD a15, 10 * SIZE(AO4) FMADD xsum4, xtemp4, a16, xsum4 LFD xtemp4, 11 * SIZE(XX) FMADD y04, atemp4, a16, y04 LFD a16, 11 * SIZE(AO4) addi AO1, AO1, 8 * SIZE addi AO2, AO2, 8 * SIZE addi AO3, AO3, 8 * SIZE addi AO4, AO4, 8 * SIZE STFD y01, 4 * SIZE(YY) LFD y01, 8 * SIZE(YY) STFD y02, 5 * SIZE(YY) LFD y02, 9 * SIZE(YY) STFD y03, 6 * SIZE(YY) LFD y03, 10 * SIZE(YY) STFD y04, 7 * SIZE(YY) LFD y04, 11 * SIZE(YY) addi XX, XX, 8 * SIZE addi YY, YY, 8 * SIZE .align 4LL(15): andi. r0, IS, 4 ble LL(18) FMADD xsum1, xtemp1, a1, xsum1 NOP1 FMADD y01, atemp1, a1, y01 LFD a1, 4 * SIZE(AO1) FMADD xsum2, xtemp1, a5, xsum2 NOP1 FMADD y02, atemp1, a2, y02 NOP2 FMADD xsum3, xtemp1, a9, xsum3 NOP1 FMADD y03, atemp1, a3, y03 NOP2 FMADD xsum4, xtemp1, a13, xsum4 LFD xtemp1, 4 * SIZE(XX) FMADD y04, atemp1, a4, y04 NOP2 FMADD xsum1, xtemp2, a2, xsum1 LFD a2, 5 * SIZE(AO1) FMADD y01, atemp2, a5, y01 LFD a5, 4 * SIZE(AO2) FMADD xsum2, xtemp2, a6, xsum2 NOP1 FMADD y02, atemp2, a6, y02 LFD a6, 5 * SIZE(AO2) FMADD xsum3, xtemp2, a10, xsum3 NOP1 FMADD y03, atemp2, a7, y03 NOP2 FMADD xsum4, xtemp2, a14, xsum4 LFD xtemp2, 5 * SIZE(XX) FMADD y04, atemp2, a8, y04 NOP2 FMADD xsum1, xtemp3, a3, xsum1 LFD a3, 6 * SIZE(AO1) FMADD y01, atemp3, a9, y01 LFD a9, 4 * SIZE(AO3) FMADD xsum2, xtemp3, a7, xsum2 LFD a7, 6 * SIZE(AO2) FMADD y02, atemp3, a10, y02 LFD a10, 5 * SIZE(AO3) FMADD xsum3, xtemp3, a11, xsum3 NOP1 FMADD y03, atemp3, a11, y03 LFD a11, 6 * SIZE(AO3) FMADD xsum4, xtemp3, a15, xsum4 LFD xtemp3, 6 * SIZE(XX) FMADD y04, atemp3, a12, y04 NOP2 FMADD xsum1, xtemp4, a4, xsum1 LFD a4, 7 * SIZE(AO1) FMADD y01, atemp4, a13, y01 LFD a13, 4 * SIZE(AO4) FMADD xsum2, xtemp4, a8, xsum2 LFD a8, 7 * SIZE(AO2) FMADD y02, atemp4, a14, y02 LFD a14, 5 * SIZE(AO4) FMADD xsum3, xtemp4, a12, xsum3 LFD a12, 7 * SIZE(AO3) FMADD y03, atemp4, a15, y03 LFD a15, 6 * SIZE(AO4) FMADD xsum4, xtemp4, a16, xsum4 LFD xtemp4, 7 * SIZE(XX) FMADD y04, atemp4, a16, y04 LFD a16, 7 * SIZE(AO4) addi AO1, AO1, 4 * SIZE addi AO2, AO2, 4 * SIZE addi AO3, AO3, 4 * SIZE addi AO4, AO4, 4 * SIZE STFD y01, 0 * SIZE(YY) LFD y01, 4 * SIZE(YY) STFD y02, 1 * SIZE(YY) LFD y02, 5 * SIZE(YY) STFD y03, 2 * SIZE(YY) LFD y03, 6 * SIZE(YY) STFD y04, 3 * SIZE(YY) LFD y04, 7 * SIZE(YY) addi XX, XX, 4 * SIZE addi YY, YY, 4 * SIZE .align 4LL(18): LFD xtemp1, ALPHA FMUL xsum1, xtemp1, xsum1 FMUL xsum2, xtemp1, xsum2 FMUL xsum3, xtemp1, xsum3 FMUL xsum4, xtemp1, xsum4 FMADD xsum1, atemp1, a1, xsum1 FMADD xsum2, atemp1, a5, xsum2 FMADD xsum3, atemp1, a9, xsum3 FMADD xsum4, atemp1, a13, xsum4 FMADD xsum1, atemp2, a5, xsum1 FMADD xsum2, atemp2, a6, xsum2 FMADD xsum3, atemp2, a10, xsum3 FMADD xsum4, atemp2, a14, xsum4 FMADD xsum1, atemp3, a9, xsum1 FMADD xsum2, atemp3, a10, xsum2 FMADD xsum3, atemp3, a11, xsum3 FMADD xsum4, atemp3, a15, xsum4 FMADD xsum1, atemp4, a13, xsum1 FMADD xsum2, atemp4, a14, xsum2 FMADD xsum3, atemp4, a15, xsum3 FMADD xsum4, atemp4, a16, xsum4 FADD y01, y01, xsum1 FADD y02, y02, xsum2 FADD y03, y03, xsum3 FADD y04, y04, xsum4 STFD y01, 0 * SIZE(YY) STFD y02, 1 * SIZE(YY) STFD y03, 2 * SIZE(YY) STFD y04, 3 * SIZE(YY) addi TEMP, IS, 8 addi IS, IS, 4 cmpw cr0, TEMP, M ble LL(11) .align 4LL(20): andi. TEMP, M, 2 ble LL(30) mr AO1, A add AO2, A, LDA add A, AO2, LDA slwi TEMP, IS, BASE_SHIFT add TEMP, X, TEMP LFD atemp1, 0 * SIZE(TEMP) LFD atemp2, 1 * SIZE(TEMP) LFD a1, ALPHA FMUL atemp1, a1, atemp1 FMUL atemp2, a1, atemp2 lfd xsum1, FZERO fmr xsum2, xsum1 mr XX, X mr YY, NEW_Y LFD xtemp1, 0 * SIZE(XX) LFD xtemp2, 1 * SIZE(XX) LFD y01, 0 * SIZE(YY) LFD y02, 1 * SIZE(YY) LFD a1, 0 * SIZE(AO1) LFD a2, 1 * SIZE(AO1) LFD a5, 0 * SIZE(AO2) LFD a6, 1 * SIZE(AO2) srawi. r0, IS, 1 mtspr CTR, r0 ble LL(28) .align 4LL(22): FMADD xsum1, xtemp1, a1, xsum1 FMADD xsum2, xtemp1, a5, xsum2 FMADD xsum1, xtemp2, a2, xsum1 FMADD xsum2, xtemp2, a6, xsum2 FMADD y01, atemp1, a1, y01 FMADD y02, atemp1, a2, y02 FMADD y01, atemp2, a5, y01 FMADD y02, atemp2, a6, y02 LFD xtemp1, 2 * SIZE(XX) LFD xtemp2, 3 * SIZE(XX) LFD a1, 2 * SIZE(AO1) LFD a2, 3 * SIZE(AO1) LFD a5, 2 * SIZE(AO2) LFD a6, 3 * SIZE(AO2) STFD y01, 0 * SIZE(YY) STFD y02, 1 * SIZE(YY) LFD y01, 2 * SIZE(YY) LFD y02, 3 * SIZE(YY) addi AO1, AO1, 2 * SIZE addi AO2, AO2, 2 * SIZE addi XX, XX, 2 * SIZE addi YY, YY, 2 * SIZE bdnz LL(22) .align 4LL(28): LFD xtemp1, ALPHA FMUL xsum1, xtemp1, xsum1 FMUL xsum2, xtemp1, xsum2 FMADD xsum1, atemp1, a1, xsum1 FMADD xsum2, atemp1, a5, xsum2 FMADD xsum1, atemp2, a5, xsum1 FMADD xsum2, atemp2, a6, xsum2 FADD y01, y01, xsum1 FADD y02, y02, xsum2 STFD y01, 0 * SIZE(YY) STFD y02, 1 * SIZE(YY) addi IS, IS, 2 .align 4 LL(30): andi. TEMP, M, 1 ble LL(990) mr AO1, A slwi TEMP, IS, BASE_SHIFT add TEMP, X, TEMP LFD atemp1, 0 * SIZE(TEMP) LFD a1, ALPHA FMUL atemp1, a1, atemp1 lfd xsum1, FZERO mr XX, X mr YY, NEW_Y LFD xtemp1, 0 * SIZE(XX) LFD y01, 0 * SIZE(YY) LFD a1, 0 * SIZE(AO1) mtspr CTR, IS cmpwi cr0, IS, 0 ble LL(38) .align 4LL(32): FMADD xsum1, xtemp1, a1, xsum1 FMADD y01, atemp1, a1, y01 LFD xtemp1, 1 * SIZE(XX) LFD a1, 1 * SIZE(AO1) STFD y01, 0 * SIZE(YY) LFD y01, 1 * SIZE(YY) addi AO1, AO1, 1 * SIZE addi XX, XX, 1 * SIZE addi YY, YY, 1 * SIZE bdnz LL(32) .align 4LL(38): LFD xtemp1, ALPHA FMUL xsum1, xtemp1, xsum1 FMADD xsum1, atemp1, a1, xsum1 FADD y01, y01, xsum1 STFD y01, 0 * SIZE(YY) .align 4LL(990): cmpwi cr0, INCY, SIZE beq LL(999) mr YY, Y srawi. r0, M, 3 mtspr CTR, r0 ble LL(995) .align 4LL(991): LFD f0, 0 * SIZE(Y) add Y, Y, INCY LFD f1, 0 * SIZE(Y) add Y, Y, INCY LFD f2, 0 * SIZE(Y) add Y, Y, INCY LFD f3, 0 * SIZE(Y) add Y, Y, INCY LFD f4, 0 * SIZE(Y) add Y, Y, INCY LFD f5, 0 * SIZE(Y) add Y, Y, INCY LFD f6, 0 * SIZE(Y) add Y, Y, INCY LFD f7, 0 * SIZE(Y) add Y, Y, INCY LFD f8, 0 * SIZE(NEW_Y) LFD f9, 1 * SIZE(NEW_Y) LFD f10, 2 * SIZE(NEW_Y) LFD f11, 3 * SIZE(NEW_Y) LFD f12, 4 * SIZE(NEW_Y) LFD f13, 5 * SIZE(NEW_Y) LFD f14, 6 * SIZE(NEW_Y) LFD f15, 7 * SIZE(NEW_Y) addi NEW_Y, NEW_Y, 8 * SIZE FADD f8, f8, f0 FADD f9, f9, f1 FADD f10, f10, f2 FADD f11, f11, f3 FADD f12, f12, f4 FADD f13, f13, f5 FADD f14, f14, f6 FADD f15, f15, f7 STFD f8, 0 * SIZE(YY) add YY, YY, INCY STFD f9, 0 * SIZE(YY) add YY, YY, INCY STFD f10, 0 * SIZE(YY) add YY, YY, INCY STFD f11, 0 * SIZE(YY) add YY, YY, INCY STFD f12, 0 * SIZE(YY) add YY, YY, INCY STFD f13, 0 * SIZE(YY) add YY, YY, INCY STFD f14, 0 * SIZE(YY) add YY, YY, INCY STFD f15, 0 * SIZE(YY) add YY, YY, INCY bdnz LL(991) .align 4LL(995): andi. J, M, 4 ble LL(996) LFD f0, 0 * SIZE(Y) add Y, Y, INCY LFD f1, 0 * SIZE(Y) add Y, Y, INCY LFD f2, 0 * SIZE(Y) add Y, Y, INCY LFD f3, 0 * SIZE(Y) add Y, Y, INCY LFD f8, 0 * SIZE(NEW_Y) LFD f9, 1 * SIZE(NEW_Y) LFD f10, 2 * SIZE(NEW_Y) LFD f11, 3 * SIZE(NEW_Y) addi NEW_Y, NEW_Y, 4 * SIZE FADD f8, f8, f0 FADD f9, f9, f1 FADD f10, f10, f2 FADD f11, f11, f3 STFD f8, 0 * SIZE(YY) add YY, YY, INCY STFD f9, 0 * SIZE(YY) add YY, YY, INCY STFD f10, 0 * SIZE(YY) add YY, YY, INCY STFD f11, 0 * SIZE(YY) add YY, YY, INCY .align 4LL(996): andi. J, M, 2 ble LL(997) LFD f0, 0 * SIZE(Y) add Y, Y, INCY LFD f1, 0 * SIZE(Y) add Y, Y, INCY LFD f8, 0 * SIZE(NEW_Y) LFD f9, 1 * SIZE(NEW_Y) addi NEW_Y, NEW_Y, 2 * SIZE FADD f8, f8, f0 FADD f9, f9, f1 STFD f8, 0 * SIZE(YY) add YY, YY, INCY STFD f9, 0 * SIZE(YY) add YY, YY, INCY .align 4LL(997): andi. J, M, 1 ble LL(999) LFD f0, 0 * SIZE(Y) LFD f8, 0 * SIZE(NEW_Y) FADD f8, f8, f0 STFD f8, 0 * SIZE(YY) .align 4LL(999): li r3, 0 lfd f14, 0(SP) lfd f15, 8(SP) lfd f16, 16(SP) lfd f17, 24(SP) lfd f18, 32(SP) lfd f19, 40(SP) lfd f20, 48(SP) lfd f21, 56(SP) lfd f22, 64(SP) lfd f23, 72(SP) lfd f24, 80(SP) lfd f25, 88(SP) lfd f26, 96(SP) lfd f27, 104(SP) lfd f28, 112(SP) lfd f29, 120(SP) lfd f30, 128(SP) lfd f31, 136(SP)#ifdef __64BIT__ ld r14, 144(SP) ld r15, 152(SP) ld r16, 160(SP) ld r17, 168(SP) ld r18, 176(SP) ld r19, 184(SP) ld r20, 192(SP) ld r21, 200(SP) ld r22, 208(SP) ld r23, 216(SP) ld r24, 224(SP) ld r25, 232(SP) ld r26, 240(SP) ld r27, 248(SP)#else lwz r14, 144(SP) lwz r15, 148(SP) lwz r16, 152(SP) lwz r17, 156(SP) lwz r18, 160(SP) lwz r19, 164(SP) lwz r20, 168(SP) lwz r21, 172(SP) lwz r22, 176(SP) lwz r23, 180(SP) lwz r24, 184(SP) lwz r25, 188(SP) lwz r26, 192(SP) lwz r27, 196(SP)#endif addi SP, SP, STACKSIZE blr EPILOGUE#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -