📄 zsymv_u.s
字号:
FMADD xsum1, xtemp1, a1, xsum1 NOP1 FMADD y01, atemp1, a1, y01 NOP2 FMADD xsum2, xtemp2, a1, xsum2 NOP1 FMADD y02, atemp2, a1, y02 LFD a1, 4 * SIZE(AO1) FMADD xsum3, xtemp1, a5, xsum3 NOP1 FMADD y03, atemp1, a3, y03 NOP2 FMADD xsum4, xtemp2, a5, xsum4 NOP1 FMADD y04, atemp2, a3, y04 NOP2 FMADD1 xsum1, xtemp2, a2, xsum1 LFD y05, 4 * SIZE(YY) FNMSUB y01, atemp2, a2, y01 NOP2 FMADD2 xsum2, xtemp1, a2, xsum2 LFD y06, 5 * SIZE(YY) FMADD y02, atemp1, a2, y02 LFD a2, 5 * SIZE(AO1) FMADD1 xsum3, xtemp2, a6, xsum3 LFD xtemp2, 5 * SIZE(XX) FNMSUB y03, atemp2, a4, y03 NOP2 FMADD2 xsum4, xtemp1, a6, xsum4 LFD xtemp1, 4 * SIZE(XX) FMADD y04, atemp1, a4, y04 NOP2 FMADD xsum1, xtemp3, a3, xsum1 LFD y07, 6 * SIZE(YY) FMADD y01, atemp3, a5, y01 NOP2 FMADD xsum2, xtemp4, a3, xsum2 LFD a3, 6 * SIZE(AO1) FMADD y02, atemp4, a5, y02 LFD a5, 4 * SIZE(AO2) FMADD xsum3, xtemp3, a7, xsum3 LFD y08, 7 * SIZE(YY) FMADD y03, atemp3, a7, y03 NOP2 FMADD xsum4, xtemp4, a7, xsum4 NOP1 FMADD y04, atemp4, a7, y04 LFD a7, 6 * SIZE(AO2) FMADD1 xsum1, xtemp4, a4, xsum1 NOP1 FNMSUB y01, atemp4, a6, y01 NOP2 FMADD2 xsum2, xtemp3, a4, xsum2 LFD a4, 7 * SIZE(AO1) FMADD y02, atemp3, a6, y02 LFD a6, 5 * SIZE(AO2) FMADD1 xsum3, xtemp4, a8, xsum3 LFD xtemp4, 7 * SIZE(XX) FNMSUB y03, atemp4, a8, y03 NOP2 FMADD2 xsum4, xtemp3, a8, xsum4 LFD xtemp3, 6 * SIZE(XX) FMADD y04, atemp3, a8, y04 LFD a8, 7 * SIZE(AO2) FMADD xsum1, xtemp1, a1, xsum1 STFD y01, 0 * SIZE(YY) FMADD y05, atemp1, a1, y05 NOP2 FMADD xsum2, xtemp2, a1, xsum2 STFD y02, 1 * SIZE(YY) FMADD y06, atemp2, a1, y06 LFD a1, 8 * SIZE(AO1) FMADD xsum3, xtemp1, a5, xsum3 STFD y03, 2 * SIZE(YY) FMADD y07, atemp1, a3, y07 NOP2 FMADD xsum4, xtemp2, a5, xsum4 STFD y04, 3 * SIZE(YY) FMADD y08, atemp2, a3, y08 NOP2 FMADD1 xsum1, xtemp2, a2, xsum1 LFD y01, 8 * SIZE(YY) FNMSUB y05, atemp2, a2, y05 NOP2 FMADD2 xsum2, xtemp1, a2, xsum2 LFD y02, 9 * SIZE(YY) FMADD y06, atemp1, a2, y06 LFD a2, 9 * SIZE(AO1) FMADD1 xsum3, xtemp2, a6, xsum3 LFD xtemp2, 9 * SIZE(XX) FNMSUB y07, atemp2, a4, y07 NOP2 FMADD2 xsum4, xtemp1, a6, xsum4 LFD xtemp1, 8 * SIZE(XX) FMADD y08, atemp1, a4, y08 NOP2 FMADD xsum1, xtemp3, a3, xsum1 LFD y03, 10 * SIZE(YY) FMADD y05, atemp3, a5, y05 NOP2 FMADD xsum2, xtemp4, a3, xsum2 LFD a3, 10 * SIZE(AO1) FMADD y06, atemp4, a5, y06 LFD a5, 8 * SIZE(AO2) FMADD xsum3, xtemp3, a7, xsum3 LFD y04, 11 * SIZE(YY) FMADD y07, atemp3, a7, y07 NOP2 FMADD xsum4, xtemp4, a7, xsum4 NOP1 FMADD y08, atemp4, a7, y08 LFD a7, 10 * SIZE(AO2) FMADD1 xsum1, xtemp4, a4, xsum1 NOP1 FNMSUB y05, atemp4, a6, y05 NOP2 FMADD2 xsum2, xtemp3, a4, xsum2 LFD a4, 11 * SIZE(AO1) FMADD y06, atemp3, a6, y06 LFD a6, 9 * SIZE(AO2) FMADD1 xsum3, xtemp4, a8, xsum3 LFD xtemp4, 11 * SIZE(XX) FNMSUB y07, atemp4, a8, y07 FMADD2 xsum4, xtemp3, a8, xsum4 LFD xtemp3, 10 * SIZE(XX) FMADD y08, atemp3, a8, y08 LFD a8, 11 * SIZE(AO2) STFD y05, 4 * SIZE(YY) STFD y06, 5 * SIZE(YY) STFD y07, 6 * SIZE(YY) STFD y08, 7 * SIZE(YY) addi AO1, AO1, 8 * SIZE addi AO2, AO2, 8 * SIZE addi XX, XX, 8 * SIZE addi YY, YY, 8 * SIZE .align 4LL(16): andi. r0, IS, 2 ble LL(18) FMADD xsum1, xtemp1, a1, xsum1 FMADD y01, atemp1, a1, y01 FMADD xsum2, xtemp2, a1, xsum2 FMADD y02, atemp2, a1, y02 FMADD xsum3, xtemp1, a5, xsum3 FMADD y03, atemp1, a3, y03 FMADD xsum4, xtemp2, a5, xsum4 FMADD y04, atemp2, a3, y04 FMADD1 xsum1, xtemp2, a2, xsum1 FNMSUB y01, atemp2, a2, y01 FMADD2 xsum2, xtemp1, a2, xsum2 FMADD y02, atemp1, a2, y02 FMADD1 xsum3, xtemp2, a6, xsum3 FNMSUB y03, atemp2, a4, y03 FMADD2 xsum4, xtemp1, a6, xsum4 FMADD y04, atemp1, a4, y04 FMADD xsum1, xtemp3, a3, xsum1 FMADD y01, atemp3, a5, y01 FMADD xsum2, xtemp4, a3, xsum2 FMADD y02, atemp4, a5, y02 FMADD xsum3, xtemp3, a7, xsum3 FMADD y03, atemp3, a7, y03 FMADD xsum4, xtemp4, a7, xsum4 FMADD y04, atemp4, a7, y04 FMADD1 xsum1, xtemp4, a4, xsum1 FNMSUB y01, atemp4, a6, y01 FMADD2 xsum2, xtemp3, a4, xsum2 FMADD y02, atemp3, a6, y02 FMADD1 xsum3, xtemp4, a8, xsum3 FNMSUB y03, atemp4, a8, y03 FMADD2 xsum4, xtemp3, a8, xsum4 FMADD y04, atemp3, a8, y04 STFD y01, 0 * SIZE(YY) STFD y02, 1 * SIZE(YY) STFD y03, 2 * SIZE(YY) STFD y04, 3 * SIZE(YY) LFD a1, 4 * SIZE(AO1) LFD a2, 5 * SIZE(AO1) LFD a5, 4 * SIZE(AO2) LFD a6, 5 * SIZE(AO2) LFD a7, 6 * SIZE(AO2) LFD a8, 7 * SIZE(AO2) LFD y01, 4 * SIZE(YY) LFD y02, 5 * SIZE(YY) LFD y03, 6 * SIZE(YY) LFD y04, 7 * SIZE(YY) addi YY, YY, 4 * SIZE .align 4LL(18): LFD y05, ALPHA_R LFD y06, ALPHA_I FMUL xtemp1, y05, xsum1 FMUL xtemp2, y06, xsum1 FMUL xtemp3, y05, xsum3 FMUL xtemp4, y06, xsum3 FNMSUB xsum1, y06, xsum2, xtemp1 FMADD xsum2, y05, xsum2, xtemp2 FNMSUB xsum3, y06, xsum4, xtemp3 FMADD xsum4, y05, xsum4, xtemp4 FMADD xsum1, atemp1, a1, xsum1 FMADD xsum2, atemp2, a1, xsum2 FMADD xsum3, atemp1, a5, xsum3 FMADD xsum4, atemp2, a5, xsum4#ifndef HEMV FMADD1 xsum1, atemp2, a2, xsum1 FMADD2 xsum2, atemp1, a2, xsum2#endif FMADD1 xsum3, atemp2, a6, xsum3 FMADD2 xsum4, atemp1, a6, xsum4 FMADD xsum1, atemp3, a5, xsum1 FMADD xsum2, atemp4, a5, xsum2 FMADD xsum3, atemp3, a7, xsum3 FMADD xsum4, atemp4, a7, xsum4 FNMSUB xsum1, atemp4, a6, xsum1 FMADD xsum2, atemp3, a6, xsum2#ifndef HEMV FNMSUB xsum3, atemp4, a8, xsum3 FMADD xsum4, atemp3, a8, xsum4#endif FADD y01, y01, xsum1 FADD y02, y02, xsum2 FADD y03, y03, xsum3 FADD y04, y04, xsum4 STFD y01, 0 * SIZE(YY) addi TEMP, IS, 4 STFD y02, 1 * SIZE(YY) addi IS, IS, 2 STFD y03, 2 * SIZE(YY) cmpw cr0, TEMP, M STFD y04, 3 * SIZE(YY) ble LL(11) .align 4 LL(20): andi. TEMP, M, 1 ble LL(990) mr AO1, A slwi TEMP, IS, ZBASE_SHIFT add TEMP, X, TEMP LFD y05, ALPHA_R LFD y06, ALPHA_I LFD xtemp1, 0 * SIZE(TEMP) LFD xtemp2, 1 * SIZE(TEMP) FMUL atemp1, y05, xtemp1 FMUL atemp2, y06, xtemp1 FNMSUB atemp1, y06, xtemp2, atemp1 FMADD atemp2, y05, xtemp2, atemp2 lfd xsum1, FZERO fmr xsum2, xsum1 mr XX, X mr YY, NEW_Y LFD a1, 0 * SIZE(AO1) LFD a2, 1 * SIZE(AO1) LFD xtemp1, 0 * SIZE(XX) LFD xtemp2, 1 * SIZE(XX) LFD y01, 0 * SIZE(YY) LFD y02, 1 * SIZE(YY) mtspr CTR, IS cmpwi cr0, IS, 0 ble LL(28) .align 4LL(22): FMADD xsum1, xtemp1, a1, xsum1 FMADD y01, atemp1, a1, y01 FMADD xsum2, xtemp2, a1, xsum2 FMADD y02, atemp2, a1, y02 LFD a1, 2 * SIZE(AO1) FMADD1 xsum1, xtemp2, a2, xsum1 LFD xtemp2, 3 * SIZE(XX) FNMSUB y01, atemp2, a2, y01 FMADD2 xsum2, xtemp1, a2, xsum2 LFD xtemp1, 2 * SIZE(XX) FMADD y02, atemp1, a2, y02 LFD a2, 3 * SIZE(AO1) addi AO1, AO1, 2 * SIZE addi XX, XX, 2 * SIZE addi YY, YY, 2 * SIZE STFD y01, -2 * SIZE(YY) LFD y01, 0 * SIZE(YY) STFD y02, -1 * SIZE(YY) LFD y02, 1 * SIZE(YY) bdnz LL(22) .align 4LL(28): LFD y05, ALPHA_R LFD y06, ALPHA_I FMUL xtemp1, y05, xsum1 FMUL xtemp2, y06, xsum1 FNMSUB xsum1, y06, xsum2, xtemp1 FMADD xsum2, y05, xsum2, xtemp2 FMADD xsum1, atemp1, a1, xsum1 FMADD xsum2, atemp2, a1, xsum2#ifndef HEMV FNMSUB xsum1, atemp2, a2, xsum1 FMADD xsum2, atemp1, a2, xsum2#endif FADD y01, y01, xsum1 FADD y02, y02, xsum2 STFD y01, 0 * SIZE(YY) STFD y02, 1 * SIZE(YY) .align 4 LL(990): cmpwi cr0, INCY, 2 * SIZE beq LL(999) mr YY, Y srawi. r0, M, 2 mtspr CTR, r0 ble LL(995) .align 4LL(991): LFD f0, 0 * SIZE(Y) LFD f1, 1 * SIZE(Y) add Y, Y, INCY LFD f2, 0 * SIZE(Y) LFD f3, 1 * SIZE(Y) add Y, Y, INCY LFD f4, 0 * SIZE(Y) LFD f5, 1 * SIZE(Y) add Y, Y, INCY LFD f6, 0 * SIZE(Y) LFD f7, 1 * SIZE(Y) add Y, Y, INCY LFD f8, 0 * SIZE(NEW_Y) LFD f9, 1 * SIZE(NEW_Y) LFD f10, 2 * SIZE(NEW_Y) LFD f11, 3 * SIZE(NEW_Y) LFD f12, 4 * SIZE(NEW_Y) LFD f13, 5 * SIZE(NEW_Y) LFD f14, 6 * SIZE(NEW_Y) LFD f15, 7 * SIZE(NEW_Y) addi NEW_Y, NEW_Y, 8 * SIZE FADD f8, f8, f0 FADD f9, f9, f1 FADD f10, f10, f2 FADD f11, f11, f3 FADD f12, f12, f4 FADD f13, f13, f5 FADD f14, f14, f6 FADD f15, f15, f7 STFD f8, 0 * SIZE(YY) STFD f9, 1 * SIZE(YY) add YY, YY, INCY STFD f10, 0 * SIZE(YY) STFD f11, 1 * SIZE(YY) add YY, YY, INCY STFD f12, 0 * SIZE(YY) STFD f13, 1 * SIZE(YY) add YY, YY, INCY STFD f14, 0 * SIZE(YY) STFD f15, 1 * SIZE(YY) add YY, YY, INCY bdnz LL(991) .align 4LL(995): andi. J, M, 2 ble LL(996) LFD f0, 0 * SIZE(Y) LFD f1, 1 * SIZE(Y) add Y, Y, INCY LFD f2, 0 * SIZE(Y) LFD f3, 1 * SIZE(Y) add Y, Y, INCY LFD f8, 0 * SIZE(NEW_Y) LFD f9, 1 * SIZE(NEW_Y) LFD f10, 2 * SIZE(NEW_Y) LFD f11, 3 * SIZE(NEW_Y) addi NEW_Y, NEW_Y, 4 * SIZE FADD f8, f8, f0 FADD f9, f9, f1 FADD f10, f10, f2 FADD f11, f11, f3 STFD f8, 0 * SIZE(YY) STFD f9, 1 * SIZE(YY) add YY, YY, INCY STFD f10, 0 * SIZE(YY) STFD f11, 1 * SIZE(YY) add YY, YY, INCY .align 4LL(996): andi. J, M, 1 ble LL(999) LFD f0, 0 * SIZE(Y) LFD f1, 1 * SIZE(Y) LFD f8, 0 * SIZE(NEW_Y) LFD f9, 1 * SIZE(NEW_Y) FADD f8, f8, f0 FADD f9, f9, f1 STFD f8, 0 * SIZE(YY) STFD f9, 1 * SIZE(YY) .align 4LL(999): li r3, 0 lfd f14, 0(SP) lfd f15, 8(SP) lfd f16, 16(SP) lfd f17, 24(SP) lfd f18, 32(SP) lfd f19, 40(SP) lfd f20, 48(SP) lfd f21, 56(SP) lfd f22, 64(SP) lfd f23, 72(SP) lfd f24, 80(SP) lfd f25, 88(SP) lfd f26, 96(SP) lfd f27, 104(SP) lfd f28, 112(SP) lfd f29, 120(SP) lfd f30, 128(SP) lfd f31, 136(SP)#ifdef __64BIT__ ld r14, 144(SP) ld r15, 152(SP) ld r16, 160(SP) ld r17, 168(SP) ld r18, 176(SP) ld r19, 184(SP) ld r20, 192(SP) ld r21, 200(SP) ld r22, 208(SP) ld r23, 216(SP) ld r24, 224(SP) ld r25, 232(SP) ld r26, 240(SP) ld r27, 248(SP)#else lwz r14, 144(SP) lwz r15, 148(SP) lwz r16, 152(SP) lwz r17, 156(SP) lwz r18, 160(SP) lwz r19, 164(SP) lwz r20, 168(SP) lwz r21, 172(SP) lwz r22, 176(SP) lwz r23, 180(SP) lwz r24, 184(SP) lwz r25, 188(SP) lwz r26, 192(SP) lwz r27, 196(SP)#endif addi SP, SP, STACKSIZE blr EPILOGUE#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -