📄 zsymv_l.s
字号:
FMADD1 xsum3, xtemp4, a8, xsum3 LFD xtemp4, 3 * SIZE(XX) FNMSUB y07, atemp4, a8, y07 NOP2 FMADD2 xsum4, xtemp3, a8, xsum4 LFD xtemp3, 2 * SIZE(XX) FMADD y08, atemp3, a8, y08 LFD a8, 3 * SIZE(AO2) STFD y05, -4 * SIZE(YY) STFD y06, -3 * SIZE(YY) STFD y07, -2 * SIZE(YY) STFD y08, -1 * SIZE(YY) .align 4LL(15): andi. r0, TEMP, 4 ble LL(16) FMADD xsum1, xtemp1, a1, xsum1 NOP1 FMADD y01, atemp1, a1, y01 NOP2 FMADD xsum2, xtemp2, a1, xsum2 NOP1 FMADD y02, atemp2, a1, y02 LFD a1, 4 * SIZE(AO1) FMADD xsum3, xtemp1, a5, xsum3 NOP1 FMADD y03, atemp1, a3, y03 NOP2 FMADD xsum4, xtemp2, a5, xsum4 NOP1 FMADD y04, atemp2, a3, y04 NOP2 FMADD1 xsum1, xtemp2, a2, xsum1 LFD y05, 4 * SIZE(YY) FNMSUB y01, atemp2, a2, y01 NOP2 FMADD2 xsum2, xtemp1, a2, xsum2 LFD y06, 5 * SIZE(YY) FMADD y02, atemp1, a2, y02 LFD a2, 5 * SIZE(AO1) FMADD1 xsum3, xtemp2, a6, xsum3 LFD xtemp2, 5 * SIZE(XX) FNMSUB y03, atemp2, a4, y03 NOP2 FMADD2 xsum4, xtemp1, a6, xsum4 LFD xtemp1, 4 * SIZE(XX) FMADD y04, atemp1, a4, y04 NOP2 FMADD xsum1, xtemp3, a3, xsum1 LFD y07, 6 * SIZE(YY) FMADD y01, atemp3, a5, y01 NOP2 FMADD xsum2, xtemp4, a3, xsum2 LFD a3, 6 * SIZE(AO1) FMADD y02, atemp4, a5, y02 LFD a5, 4 * SIZE(AO2) FMADD xsum3, xtemp3, a7, xsum3 LFD y08, 7 * SIZE(YY) FMADD y03, atemp3, a7, y03 NOP2 FMADD xsum4, xtemp4, a7, xsum4 NOP1 FMADD y04, atemp4, a7, y04 LFD a7, 6 * SIZE(AO2) FMADD1 xsum1, xtemp4, a4, xsum1 NOP1 FNMSUB y01, atemp4, a6, y01 NOP2 FMADD2 xsum2, xtemp3, a4, xsum2 LFD a4, 7 * SIZE(AO1) FMADD y02, atemp3, a6, y02 LFD a6, 5 * SIZE(AO2) FMADD1 xsum3, xtemp4, a8, xsum3 LFD xtemp4, 7 * SIZE(XX) FNMSUB y03, atemp4, a8, y03 NOP2 FMADD2 xsum4, xtemp3, a8, xsum4 LFD xtemp3, 6 * SIZE(XX) FMADD y04, atemp3, a8, y04 LFD a8, 7 * SIZE(AO2) FMADD xsum1, xtemp1, a1, xsum1 STFD y01, 0 * SIZE(YY) FMADD y05, atemp1, a1, y05 NOP2 FMADD xsum2, xtemp2, a1, xsum2 STFD y02, 1 * SIZE(YY) FMADD y06, atemp2, a1, y06 LFD a1, 8 * SIZE(AO1) FMADD xsum3, xtemp1, a5, xsum3 STFD y03, 2 * SIZE(YY) FMADD y07, atemp1, a3, y07 NOP2 FMADD xsum4, xtemp2, a5, xsum4 STFD y04, 3 * SIZE(YY) FMADD y08, atemp2, a3, y08 NOP2 FMADD1 xsum1, xtemp2, a2, xsum1 LFD y01, 8 * SIZE(YY) FNMSUB y05, atemp2, a2, y05 NOP2 FMADD2 xsum2, xtemp1, a2, xsum2 LFD y02, 9 * SIZE(YY) FMADD y06, atemp1, a2, y06 LFD a2, 9 * SIZE(AO1) FMADD1 xsum3, xtemp2, a6, xsum3 LFD xtemp2, 9 * SIZE(XX) FNMSUB y07, atemp2, a4, y07 NOP2 FMADD2 xsum4, xtemp1, a6, xsum4 LFD xtemp1, 8 * SIZE(XX) FMADD y08, atemp1, a4, y08 NOP2 FMADD xsum1, xtemp3, a3, xsum1 LFD y03, 10 * SIZE(YY) FMADD y05, atemp3, a5, y05 NOP2 FMADD xsum2, xtemp4, a3, xsum2 LFD a3, 10 * SIZE(AO1) FMADD y06, atemp4, a5, y06 LFD a5, 8 * SIZE(AO2) FMADD xsum3, xtemp3, a7, xsum3 LFD y04, 11 * SIZE(YY) FMADD y07, atemp3, a7, y07 NOP2 FMADD xsum4, xtemp4, a7, xsum4 addi YY, YY, 8 * SIZE FMADD y08, atemp4, a7, y08 LFD a7, 10 * SIZE(AO2) FMADD1 xsum1, xtemp4, a4, xsum1 addi AO2, AO2, 8 * SIZE FNMSUB y05, atemp4, a6, y05 addi XX, XX, 8 * SIZE FMADD2 xsum2, xtemp3, a4, xsum2 LFD a4, 11 * SIZE(AO1) FMADD y06, atemp3, a6, y06 LFD a6, 1 * SIZE(AO2) FMADD1 xsum3, xtemp4, a8, xsum3 LFD xtemp4, 3 * SIZE(XX) FNMSUB y07, atemp4, a8, y07 addi AO1, AO1, 8 * SIZE FMADD2 xsum4, xtemp3, a8, xsum4 LFD xtemp3, 2 * SIZE(XX) FMADD y08, atemp3, a8, y08 LFD a8, 3 * SIZE(AO2) STFD y05, -4 * SIZE(YY) STFD y06, -3 * SIZE(YY) STFD y07, -2 * SIZE(YY) STFD y08, -1 * SIZE(YY) .align 4LL(16): andi. r0, TEMP, 2 ble LL(17) FMADD xsum1, xtemp1, a1, xsum1 NOP1 FMADD y01, atemp1, a1, y01 NOP2 FMADD xsum2, xtemp2, a1, xsum2 NOP1 FMADD y02, atemp2, a1, y02 LFD a1, 4 * SIZE(AO1) FMADD xsum3, xtemp1, a5, xsum3 FMADD y03, atemp1, a3, y03 FMADD xsum4, xtemp2, a5, xsum4 FMADD y04, atemp2, a3, y04 FMADD1 xsum1, xtemp2, a2, xsum1 NOP1 FNMSUB y01, atemp2, a2, y01 NOP2 FMADD2 xsum2, xtemp1, a2, xsum2 NOP1 FMADD y02, atemp1, a2, y02 LFD a2, 5 * SIZE(AO1) FMADD1 xsum3, xtemp2, a6, xsum3 LFD xtemp2, 5 * SIZE(XX) FNMSUB y03, atemp2, a4, y03 NOP2 FMADD2 xsum4, xtemp1, a6, xsum4 LFD xtemp1, 4 * SIZE(XX) FMADD y04, atemp1, a4, y04 NOP2 FMADD xsum1, xtemp3, a3, xsum1 NOP1 FMADD y01, atemp3, a5, y01 NOP2 FMADD xsum2, xtemp4, a3, xsum2 NOP1 FMADD y02, atemp4, a5, y02 LFD a5, 4 * SIZE(AO2) FMADD xsum3, xtemp3, a7, xsum3 FMADD y03, atemp3, a7, y03 FMADD xsum4, xtemp4, a7, xsum4 FMADD y04, atemp4, a7, y04 FMADD1 xsum1, xtemp4, a4, xsum1 NOP1 FNMSUB y01, atemp4, a6, y01 NOP2 FMADD2 xsum2, xtemp3, a4, xsum2 NOP1 FMADD y02, atemp3, a6, y02 LFD a6, 5 * SIZE(AO2) FMADD1 xsum3, xtemp4, a8, xsum3 addi AO1, AO1, 4 * SIZE FNMSUB y03, atemp4, a8, y03 addi AO2, AO2, 4 * SIZE FMADD2 xsum4, xtemp3, a8, xsum4 addi YY, YY, 4 * SIZE FMADD y04, atemp3, a8, y04 NOP2 STFD y01, -4 * SIZE(YY) LFD y01, 0 * SIZE(YY) STFD y02, -3 * SIZE(YY) LFD y02, 1 * SIZE(YY) STFD y03, -2 * SIZE(YY) STFD y04, -1 * SIZE(YY) .align 4LL(17): andi. r0, M, 1 ble LL(18) FMADD xsum1, xtemp1, a1, xsum1 FMADD y01, atemp1, a1, y01 FMADD xsum2, xtemp2, a1, xsum2 FMADD y02, atemp2, a1, y02 FMADD xsum3, xtemp1, a5, xsum3 FNMSUB y01, atemp2, a2, y01 FMADD xsum4, xtemp2, a5, xsum4 FMADD y02, atemp1, a2, y02 FMADD1 xsum1, xtemp2, a2, xsum1 FMADD y01, atemp3, a5, y01 FMADD2 xsum2, xtemp1, a2, xsum2 FMADD y02, atemp4, a5, y02 FMADD1 xsum3, xtemp2, a6, xsum3 FNMSUB y01, atemp4, a6, y01 FMADD2 xsum4, xtemp1, a6, xsum4 FMADD y02, atemp3, a6, y02 STFD y01, 0 * SIZE(YY) STFD y02, 1 * SIZE(YY) STFD y03, 2 * SIZE(YY) STFD y04, 3 * SIZE(YY) .align 4LL(18): LFD y05, ALPHA_R LFD y06, ALPHA_I slwi TEMP, IS, ZBASE_SHIFT add YY, NEW_Y, TEMP LFD y01, 0 * SIZE(YY) LFD y02, 1 * SIZE(YY) LFD y03, 2 * SIZE(YY) LFD y04, 3 * SIZE(YY) FMUL xtemp1, y05, xsum1 FMUL xtemp2, y06, xsum1 FMUL xtemp3, y05, xsum3 FMUL xtemp4, y06, xsum3 FNMSUB xsum1, y06, xsum2, xtemp1 FMADD xsum2, y05, xsum2, xtemp2 FNMSUB xsum3, y06, xsum4, xtemp3 FMADD xsum4, y05, xsum4, xtemp4 FADD y01, y01, xsum1 FADD y02, y02, xsum2 FADD y03, y03, xsum3 FADD y04, y04, xsum4 STFD y01, 0 * SIZE(YY) addi TEMP, IS, 4 STFD y02, 1 * SIZE(YY) addi IS, IS, 2 STFD y03, 2 * SIZE(YY) cmpw cr0, TEMP, M STFD y04, 3 * SIZE(YY) ble LL(11) .align 4 LL(20): andi. TEMP, M, 1 ble LL(990) slwi TEMP, IS, ZBASE_SHIFT add XX, X, TEMP add YY, NEW_Y, TEMP LFD y05, ALPHA_R LFD y06, ALPHA_I LFD atemp1, 0 * SIZE(XX) LFD atemp2, 1 * SIZE(XX) LFD a1, 0 * SIZE(A) LFD a2, 1 * SIZE(A) FMUL xsum1, atemp1, a1 FMUL xsum2, atemp2, a1#ifndef HEMV FNMSUB xsum1, atemp2, a2, xsum1 FMADD xsum2, atemp1, a2, xsum2#endif FMUL xtemp1, y05, atemp1 FMUL xtemp2, y06, atemp1 FNMSUB atemp1, y06, atemp2, xtemp1 FMADD atemp2, y05, atemp2, xtemp2 LFD y05, ALPHA_R LFD y06, ALPHA_I LFD y01, 0 * SIZE(YY) LFD y02, 1 * SIZE(YY) FMUL xtemp1, y05, xsum1 FMUL xtemp2, y06, xsum1 FNMSUB xsum1, y06, xsum2, xtemp1 FMADD xsum2, y05, xsum2, xtemp2 FADD y01, y01, xsum1 FADD y02, y02, xsum2 STFD y01, 0 * SIZE(YY) STFD y02, 1 * SIZE(YY) .align 4 LL(990): cmpwi cr0, INCY, 2 * SIZE beq LL(999) mr YY, Y srawi. r0, M, 2 mtspr CTR, r0 ble LL(995) .align 4LL(991): LFD f0, 0 * SIZE(Y) LFD f1, 1 * SIZE(Y) add Y, Y, INCY LFD f2, 0 * SIZE(Y) LFD f3, 1 * SIZE(Y) add Y, Y, INCY LFD f4, 0 * SIZE(Y) LFD f5, 1 * SIZE(Y) add Y, Y, INCY LFD f6, 0 * SIZE(Y) LFD f7, 1 * SIZE(Y) add Y, Y, INCY LFD f8, 0 * SIZE(NEW_Y) LFD f9, 1 * SIZE(NEW_Y) LFD f10, 2 * SIZE(NEW_Y) LFD f11, 3 * SIZE(NEW_Y) LFD f12, 4 * SIZE(NEW_Y) LFD f13, 5 * SIZE(NEW_Y) LFD f14, 6 * SIZE(NEW_Y) LFD f15, 7 * SIZE(NEW_Y) addi NEW_Y, NEW_Y, 8 * SIZE FADD f8, f8, f0 FADD f9, f9, f1 FADD f10, f10, f2 FADD f11, f11, f3 FADD f12, f12, f4 FADD f13, f13, f5 FADD f14, f14, f6 FADD f15, f15, f7 STFD f8, 0 * SIZE(YY) STFD f9, 1 * SIZE(YY) add YY, YY, INCY STFD f10, 0 * SIZE(YY) STFD f11, 1 * SIZE(YY) add YY, YY, INCY STFD f12, 0 * SIZE(YY) STFD f13, 1 * SIZE(YY) add YY, YY, INCY STFD f14, 0 * SIZE(YY) STFD f15, 1 * SIZE(YY) add YY, YY, INCY bdnz LL(991) .align 4LL(995): andi. J, M, 2 ble LL(996) LFD f0, 0 * SIZE(Y) LFD f1, 1 * SIZE(Y) add Y, Y, INCY LFD f2, 0 * SIZE(Y) LFD f3, 1 * SIZE(Y) add Y, Y, INCY LFD f8, 0 * SIZE(NEW_Y) LFD f9, 1 * SIZE(NEW_Y) LFD f10, 2 * SIZE(NEW_Y) LFD f11, 3 * SIZE(NEW_Y) addi NEW_Y, NEW_Y, 4 * SIZE FADD f8, f8, f0 FADD f9, f9, f1 FADD f10, f10, f2 FADD f11, f11, f3 STFD f8, 0 * SIZE(YY) STFD f9, 1 * SIZE(YY) add YY, YY, INCY STFD f10, 0 * SIZE(YY) STFD f11, 1 * SIZE(YY) add YY, YY, INCY .align 4LL(996): andi. J, M, 1 ble LL(999) LFD f0, 0 * SIZE(Y) LFD f1, 1 * SIZE(Y) LFD f8, 0 * SIZE(NEW_Y) LFD f9, 1 * SIZE(NEW_Y) FADD f8, f8, f0 FADD f9, f9, f1 STFD f8, 0 * SIZE(YY) STFD f9, 1 * SIZE(YY) .align 4LL(999): li r3, 0 lfd f14, 0(SP) lfd f15, 8(SP) lfd f16, 16(SP) lfd f17, 24(SP) lfd f18, 32(SP) lfd f19, 40(SP) lfd f20, 48(SP) lfd f21, 56(SP) lfd f22, 64(SP) lfd f23, 72(SP) lfd f24, 80(SP) lfd f25, 88(SP) lfd f26, 96(SP) lfd f27, 104(SP) lfd f28, 112(SP) lfd f29, 120(SP) lfd f30, 128(SP) lfd f31, 136(SP)#ifdef __64BIT__ ld r14, 144(SP) ld r15, 152(SP) ld r16, 160(SP) ld r17, 168(SP) ld r18, 176(SP) ld r19, 184(SP) ld r20, 192(SP) ld r21, 200(SP) ld r22, 208(SP) ld r23, 216(SP) ld r24, 224(SP) ld r25, 232(SP) ld r26, 240(SP) ld r27, 248(SP)#else lwz r14, 144(SP) lwz r15, 148(SP) lwz r16, 152(SP) lwz r17, 156(SP) lwz r18, 160(SP) lwz r19, 164(SP) lwz r20, 168(SP) lwz r21, 172(SP) lwz r22, 176(SP) lwz r23, 180(SP) lwz r24, 184(SP) lwz r25, 188(SP) lwz r26, 192(SP) lwz r27, 196(SP)#endif addi SP, SP, STACKSIZE blr EPILOGUE#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -