⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zsymv_l.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 3 页
字号:
	FMADD1	xsum3, xtemp4, a8,  xsum3	LFD	xtemp4,  3 * SIZE(XX)	FNMSUB	y07, atemp4, a8, y07	NOP2	FMADD2	xsum4, xtemp3, a8,  xsum4	LFD	xtemp3,  2 * SIZE(XX)	FMADD	y08, atemp3, a8, y08	LFD	a8,  3 * SIZE(AO2)	STFD	y05, -4 * SIZE(YY)	STFD	y06, -3 * SIZE(YY)	STFD	y07, -2 * SIZE(YY)	STFD	y08, -1 * SIZE(YY)	.align 4LL(15):	andi.	r0,  TEMP, 4	ble	LL(16)	FMADD	xsum1, xtemp1, a1,  xsum1	NOP1	FMADD	y01, atemp1, a1, y01	NOP2	FMADD	xsum2, xtemp2, a1,  xsum2	NOP1	FMADD	y02, atemp2, a1, y02	LFD	a1,  4 * SIZE(AO1)	FMADD	xsum3, xtemp1, a5,  xsum3	NOP1	FMADD	y03, atemp1, a3, y03	NOP2	FMADD	xsum4, xtemp2, a5,  xsum4	NOP1	FMADD	y04, atemp2, a3, y04	NOP2	FMADD1	xsum1, xtemp2, a2,  xsum1	LFD	y05,  4 * SIZE(YY)	FNMSUB	y01, atemp2, a2, y01	NOP2	FMADD2	xsum2, xtemp1, a2,  xsum2	LFD	y06,  5 * SIZE(YY)	FMADD	y02, atemp1, a2, y02	LFD	a2,  5 * SIZE(AO1)	FMADD1	xsum3, xtemp2, a6,  xsum3	LFD	xtemp2,  5 * SIZE(XX)	FNMSUB	y03, atemp2, a4, y03	NOP2	FMADD2	xsum4, xtemp1, a6,  xsum4	LFD	xtemp1,  4 * SIZE(XX)	FMADD	y04, atemp1, a4, y04	NOP2	FMADD	xsum1, xtemp3, a3,  xsum1	LFD	y07,  6 * SIZE(YY)	FMADD	y01, atemp3, a5, y01	NOP2	FMADD	xsum2, xtemp4, a3,  xsum2	LFD	a3,  6 * SIZE(AO1)	FMADD	y02, atemp4, a5, y02	LFD	a5,  4 * SIZE(AO2)	FMADD	xsum3, xtemp3, a7,  xsum3	LFD	y08,  7 * SIZE(YY)	FMADD	y03, atemp3, a7, y03	NOP2	FMADD	xsum4, xtemp4, a7,  xsum4	NOP1	FMADD	y04, atemp4, a7, y04	LFD	a7,  6 * SIZE(AO2)	FMADD1	xsum1, xtemp4, a4,  xsum1	NOP1	FNMSUB	y01, atemp4, a6, y01	NOP2	FMADD2	xsum2, xtemp3, a4,  xsum2	LFD	a4,  7 * SIZE(AO1)	FMADD	y02, atemp3, a6, y02	LFD	a6,  5 * SIZE(AO2)	FMADD1	xsum3, xtemp4, a8,  xsum3	LFD	xtemp4,  7 * SIZE(XX)	FNMSUB	y03, atemp4, a8, y03	NOP2	FMADD2	xsum4, xtemp3, a8,  xsum4	LFD	xtemp3,  6 * SIZE(XX)	FMADD	y04, atemp3, a8, y04	LFD	a8,  7 * SIZE(AO2)	FMADD	xsum1, xtemp1, a1,  xsum1	STFD	y01,  0 * SIZE(YY)	FMADD	y05, atemp1, a1, y05	NOP2	FMADD	xsum2, xtemp2, a1,  xsum2	STFD	y02,  1 * SIZE(YY)	FMADD	y06, atemp2, a1, y06	LFD	a1,  8 * SIZE(AO1)	FMADD	xsum3, xtemp1, a5,  xsum3	STFD	y03,  2 * SIZE(YY)	FMADD	y07, atemp1, a3, y07	NOP2	FMADD	xsum4, xtemp2, a5,  xsum4	STFD	y04,  3 * SIZE(YY)	FMADD	y08, atemp2, a3, y08	NOP2	FMADD1	xsum1, xtemp2, a2,  xsum1	LFD	y01,  8 * SIZE(YY)	FNMSUB	y05, atemp2, a2, y05	NOP2	FMADD2	xsum2, xtemp1, a2,  xsum2	LFD	y02,  9 * SIZE(YY)	FMADD	y06, atemp1, a2, y06	LFD	a2,  9 * SIZE(AO1)	FMADD1	xsum3, xtemp2, a6,  xsum3	LFD	xtemp2,  9 * SIZE(XX)	FNMSUB	y07, atemp2, a4, y07	NOP2	FMADD2	xsum4, xtemp1, a6,  xsum4	LFD	xtemp1,  8 * SIZE(XX)	FMADD	y08, atemp1, a4, y08	NOP2	FMADD	xsum1, xtemp3, a3,  xsum1	LFD	y03, 10 * SIZE(YY)	FMADD	y05, atemp3, a5, y05	NOP2	FMADD	xsum2, xtemp4, a3,  xsum2	LFD	a3, 10 * SIZE(AO1)	FMADD	y06, atemp4, a5, y06	LFD	a5,  8 * SIZE(AO2)	FMADD	xsum3, xtemp3, a7,  xsum3	LFD	y04, 11 * SIZE(YY)	FMADD	y07, atemp3, a7, y07	NOP2	FMADD	xsum4, xtemp4, a7,  xsum4	addi	YY, YY, 8 * SIZE	FMADD	y08, atemp4, a7, y08	LFD	a7, 10 * SIZE(AO2)	FMADD1	xsum1, xtemp4, a4,  xsum1	addi	AO2, AO2, 8 * SIZE	FNMSUB	y05, atemp4, a6, y05	addi	XX, XX, 8 * SIZE	FMADD2	xsum2, xtemp3, a4,  xsum2	LFD	a4, 11 * SIZE(AO1)	FMADD	y06, atemp3, a6, y06	LFD	a6,  1 * SIZE(AO2)	FMADD1	xsum3, xtemp4, a8,  xsum3	LFD	xtemp4,  3 * SIZE(XX)	FNMSUB	y07, atemp4, a8, y07	addi	AO1, AO1, 8 * SIZE	FMADD2	xsum4, xtemp3, a8,  xsum4	LFD	xtemp3,  2 * SIZE(XX)	FMADD	y08, atemp3, a8, y08	LFD	a8,  3 * SIZE(AO2)	STFD	y05, -4 * SIZE(YY)	STFD	y06, -3 * SIZE(YY)	STFD	y07, -2 * SIZE(YY)	STFD	y08, -1 * SIZE(YY)	.align 4LL(16):	andi.	r0,  TEMP, 2	ble	LL(17)	FMADD	xsum1, xtemp1, a1,  xsum1	NOP1	FMADD	y01, atemp1, a1, y01	NOP2	FMADD	xsum2, xtemp2, a1,  xsum2	NOP1	FMADD	y02, atemp2, a1, y02	LFD	a1,  4 * SIZE(AO1)	FMADD	xsum3, xtemp1, a5,  xsum3	FMADD	y03, atemp1, a3, y03	FMADD	xsum4, xtemp2, a5,  xsum4	FMADD	y04, atemp2, a3, y04	FMADD1	xsum1, xtemp2, a2,  xsum1	NOP1	FNMSUB	y01, atemp2, a2, y01	NOP2	FMADD2	xsum2, xtemp1, a2,  xsum2	NOP1	FMADD	y02, atemp1, a2, y02	LFD	a2,  5 * SIZE(AO1)	FMADD1	xsum3, xtemp2, a6,  xsum3	LFD	xtemp2,  5 * SIZE(XX)	FNMSUB	y03, atemp2, a4, y03	NOP2	FMADD2	xsum4, xtemp1, a6,  xsum4	LFD	xtemp1,  4 * SIZE(XX)	FMADD	y04, atemp1, a4, y04	NOP2	FMADD	xsum1, xtemp3, a3,  xsum1	NOP1	FMADD	y01, atemp3, a5, y01	NOP2	FMADD	xsum2, xtemp4, a3,  xsum2	NOP1	FMADD	y02, atemp4, a5, y02	LFD	a5,  4 * SIZE(AO2)	FMADD	xsum3, xtemp3, a7,  xsum3	FMADD	y03, atemp3, a7, y03	FMADD	xsum4, xtemp4, a7,  xsum4	FMADD	y04, atemp4, a7, y04	FMADD1	xsum1, xtemp4, a4,  xsum1	NOP1	FNMSUB	y01, atemp4, a6, y01	NOP2	FMADD2	xsum2, xtemp3, a4,  xsum2	NOP1	FMADD	y02, atemp3, a6, y02	LFD	a6,  5 * SIZE(AO2)	FMADD1	xsum3, xtemp4, a8,  xsum3	addi	AO1, AO1, 4 * SIZE	FNMSUB	y03, atemp4, a8, y03	addi	AO2, AO2, 4 * SIZE	FMADD2	xsum4, xtemp3, a8,  xsum4	addi	YY,  YY, 4 * SIZE	FMADD	y04, atemp3, a8, y04	NOP2	STFD	y01, -4 * SIZE(YY)	LFD	y01,  0 * SIZE(YY)	STFD	y02, -3 * SIZE(YY)	LFD	y02,  1 * SIZE(YY)	STFD	y03, -2 * SIZE(YY)	STFD	y04, -1 * SIZE(YY)	.align 4LL(17):	andi.	r0,  M, 1	ble	LL(18)	FMADD	xsum1, xtemp1, a1,  xsum1	FMADD	y01, atemp1, a1, y01	FMADD	xsum2, xtemp2, a1,  xsum2	FMADD	y02, atemp2, a1, y02	FMADD	xsum3, xtemp1, a5,  xsum3	FNMSUB	y01, atemp2, a2, y01	FMADD	xsum4, xtemp2, a5,  xsum4	FMADD	y02, atemp1, a2, y02	FMADD1	xsum1, xtemp2, a2,  xsum1	FMADD	y01, atemp3, a5, y01	FMADD2	xsum2, xtemp1, a2,  xsum2	FMADD	y02, atemp4, a5, y02	FMADD1	xsum3, xtemp2, a6,  xsum3	FNMSUB	y01, atemp4, a6, y01	FMADD2	xsum4, xtemp1, a6,  xsum4	FMADD	y02, atemp3, a6, y02	STFD	y01,  0 * SIZE(YY)	STFD	y02,  1 * SIZE(YY)	STFD	y03,  2 * SIZE(YY)	STFD	y04,  3 * SIZE(YY)	.align 4LL(18):	LFD	y05, ALPHA_R	LFD	y06, ALPHA_I	slwi	TEMP,  IS,  ZBASE_SHIFT	add	YY, NEW_Y, TEMP	LFD	y01,  0 * SIZE(YY)	LFD	y02,  1 * SIZE(YY)	LFD	y03,  2 * SIZE(YY)	LFD	y04,  3 * SIZE(YY)	FMUL	xtemp1, y05, xsum1	FMUL	xtemp2, y06, xsum1	FMUL	xtemp3, y05, xsum3	FMUL	xtemp4, y06, xsum3	FNMSUB	xsum1, y06, xsum2, xtemp1	FMADD	xsum2, y05, xsum2, xtemp2	FNMSUB	xsum3, y06, xsum4, xtemp3	FMADD	xsum4, y05, xsum4, xtemp4	FADD	y01, y01, xsum1	FADD	y02, y02, xsum2	FADD	y03, y03, xsum3	FADD	y04, y04, xsum4	STFD	y01,  0 * SIZE(YY)	addi	TEMP, IS, 4	STFD	y02,  1 * SIZE(YY)	addi	IS,   IS, 2	STFD	y03,  2 * SIZE(YY)	cmpw	cr0, TEMP, M	STFD	y04,  3 * SIZE(YY)	ble	LL(11)	.align 4	LL(20):	andi.	TEMP, M, 1	ble	LL(990)	slwi	TEMP,  IS,  ZBASE_SHIFT	add	XX, X,     TEMP	add	YY, NEW_Y, TEMP	LFD	y05, ALPHA_R	LFD	y06, ALPHA_I	LFD	atemp1, 0 * SIZE(XX)	LFD	atemp2, 1 * SIZE(XX)	LFD	a1,  0 * SIZE(A)	LFD	a2,  1 * SIZE(A)	FMUL	xsum1, atemp1, a1	FMUL	xsum2, atemp2, a1#ifndef HEMV	FNMSUB	xsum1, atemp2, a2, xsum1	FMADD	xsum2, atemp1, a2, xsum2#endif	FMUL	xtemp1, y05, atemp1	FMUL	xtemp2, y06, atemp1	FNMSUB	atemp1, y06, atemp2, xtemp1	FMADD	atemp2, y05, atemp2, xtemp2	LFD	y05, ALPHA_R	LFD	y06, ALPHA_I	LFD	y01,  0 * SIZE(YY)	LFD	y02,  1 * SIZE(YY)	FMUL	xtemp1, y05, xsum1	FMUL	xtemp2, y06, xsum1	FNMSUB	xsum1, y06, xsum2, xtemp1	FMADD	xsum2, y05, xsum2, xtemp2	FADD	y01, y01, xsum1	FADD	y02, y02, xsum2	STFD	y01,  0 * SIZE(YY)	STFD	y02,  1 * SIZE(YY)	.align 4	LL(990):	cmpwi	cr0, INCY, 2 * SIZE	beq	LL(999)	mr	YY, Y	srawi.	r0, M, 2	mtspr	CTR, r0	ble	LL(995)	.align 4LL(991):	LFD	f0,  0 * SIZE(Y)	LFD	f1,  1 * SIZE(Y)	add	Y, Y, INCY	LFD	f2,  0 * SIZE(Y)	LFD	f3,  1 * SIZE(Y)	add	Y, Y, INCY	LFD	f4,  0 * SIZE(Y)	LFD	f5,  1 * SIZE(Y)	add	Y, Y, INCY	LFD	f6,  0 * SIZE(Y)	LFD	f7,  1 * SIZE(Y)	add	Y, Y, INCY	LFD	f8,   0 * SIZE(NEW_Y)	LFD	f9,   1 * SIZE(NEW_Y)	LFD	f10,  2 * SIZE(NEW_Y)	LFD	f11,  3 * SIZE(NEW_Y)	LFD	f12,  4 * SIZE(NEW_Y)	LFD	f13,  5 * SIZE(NEW_Y)	LFD	f14,  6 * SIZE(NEW_Y)	LFD	f15,  7 * SIZE(NEW_Y)	addi	NEW_Y, NEW_Y, 8 * SIZE	FADD	f8,  f8,  f0	FADD	f9,  f9,  f1	FADD	f10, f10, f2	FADD	f11, f11, f3	FADD	f12, f12, f4	FADD	f13, f13, f5	FADD	f14, f14, f6	FADD	f15, f15, f7	STFD	f8,  0 * SIZE(YY)	STFD	f9,  1 * SIZE(YY)	add	YY, YY, INCY	STFD	f10, 0 * SIZE(YY)	STFD	f11, 1 * SIZE(YY)	add	YY, YY, INCY	STFD	f12, 0 * SIZE(YY)	STFD	f13, 1 * SIZE(YY)	add	YY, YY, INCY	STFD	f14, 0 * SIZE(YY)	STFD	f15, 1 * SIZE(YY)	add	YY, YY, INCY	bdnz	LL(991)	.align 4LL(995):	andi.	J, M, 2	ble	LL(996)	LFD	f0,  0 * SIZE(Y)	LFD	f1,  1 * SIZE(Y)	add	Y, Y, INCY	LFD	f2,  0 * SIZE(Y)	LFD	f3,  1 * SIZE(Y)	add	Y, Y, INCY	LFD	f8,   0 * SIZE(NEW_Y)	LFD	f9,   1 * SIZE(NEW_Y)	LFD	f10,  2 * SIZE(NEW_Y)	LFD	f11,  3 * SIZE(NEW_Y)	addi	NEW_Y, NEW_Y, 4 * SIZE	FADD	f8,  f8,  f0	FADD	f9,  f9,  f1	FADD	f10, f10, f2	FADD	f11, f11, f3	STFD	f8,  0 * SIZE(YY)	STFD	f9,  1 * SIZE(YY)	add	YY, YY, INCY	STFD	f10, 0 * SIZE(YY)	STFD	f11, 1 * SIZE(YY)	add	YY, YY, INCY	.align 4LL(996):	andi.	J, M, 1	ble	LL(999)	LFD	f0,  0 * SIZE(Y)	LFD	f1,  1 * SIZE(Y)	LFD	f8,   0 * SIZE(NEW_Y)	LFD	f9,   1 * SIZE(NEW_Y)	FADD	f8,  f8,  f0	FADD	f9,  f9,  f1	STFD	f8,  0 * SIZE(YY)	STFD	f9,  1 * SIZE(YY)	.align 4LL(999):	li	r3, 0	lfd	f14,     0(SP)	lfd	f15,     8(SP)	lfd	f16,    16(SP)	lfd	f17,    24(SP)	lfd	f18,    32(SP)	lfd	f19,    40(SP)	lfd	f20,    48(SP)	lfd	f21,    56(SP)	lfd	f22,    64(SP)	lfd	f23,    72(SP)	lfd	f24,    80(SP)	lfd	f25,    88(SP)	lfd	f26,    96(SP)	lfd	f27,   104(SP)	lfd	f28,   112(SP)	lfd	f29,   120(SP)	lfd	f30,   128(SP)	lfd	f31,   136(SP)#ifdef __64BIT__	ld	r14,   144(SP)	ld	r15,   152(SP)	ld	r16,   160(SP)	ld	r17,   168(SP)	ld	r18,   176(SP)	ld	r19,   184(SP)	ld	r20,   192(SP)	ld	r21,   200(SP)	ld	r22,   208(SP)	ld	r23,   216(SP)	ld	r24,   224(SP)	ld	r25,   232(SP)	ld	r26,   240(SP)	ld	r27,   248(SP)#else	lwz	r14,   144(SP)	lwz	r15,   148(SP)	lwz	r16,   152(SP)	lwz	r17,   156(SP)	lwz	r18,   160(SP)	lwz	r19,   164(SP)	lwz	r20,   168(SP)	lwz	r21,   172(SP)	lwz	r22,   176(SP)	lwz	r23,   180(SP)	lwz	r24,   184(SP)	lwz	r25,   188(SP)	lwz	r26,   192(SP)	lwz	r27,   196(SP)#endif	addi	SP, SP, STACKSIZE	blr	EPILOGUE#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -