⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gemv_n.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 5 页
字号:
	LFD	a4, 11 * SIZE(AO6)	FMADD	y05, alpha6, a5, y05	FMADD	y06, alpha6, a6, y06	FMADD	y07, alpha6, a7, y07	FMADD	y08, alpha6, a8, y08	LFD	a5, 12 * SIZE(AO6)	LFD	a6, 13 * SIZE(AO6)	LFD	a7, 14 * SIZE(AO6)	LFD	a8, 15 * SIZE(AO6)	addi	AO6, AO6, 16 * SIZE	nop	nop	PREFETCH_A6	FMADD	y09, alpha6, a1, y09	FMADD	y10, alpha6, a2, y10	FMADD	y11, alpha6, a3, y11	FMADD	y12, alpha6, a4, y12	LFD	a1,  0 * SIZE(AO7)	LFD	a2,  1 * SIZE(AO7)	LFD	a3,  2 * SIZE(AO7)	LFD	a4,  3 * SIZE(AO7)	FMADD	y13, alpha6, a5, y13	FMADD	y14, alpha6, a6, y14	FMADD	y15, alpha6, a7, y15	FMADD	y16, alpha6, a8, y16	LFD	a5,  4 * SIZE(AO7)	LFD	a6,  5 * SIZE(AO7)	LFD	a7,  6 * SIZE(AO7)	LFD	a8,  7 * SIZE(AO7)	FMADD	y01, alpha7, a1, y01	FMADD	y02, alpha7, a2, y02	FMADD	y03, alpha7, a3, y03	FMADD	y04, alpha7, a4, y04	LFD	a1,  8 * SIZE(AO7)	LFD	a2,  9 * SIZE(AO7)	LFD	a3, 10 * SIZE(AO7)	LFD	a4, 11 * SIZE(AO7)	FMADD	y05, alpha7, a5, y05	FMADD	y06, alpha7, a6, y06	FMADD	y07, alpha7, a7, y07	FMADD	y08, alpha7, a8, y08	LFD	a5, 12 * SIZE(AO7)	LFD	a6, 13 * SIZE(AO7)	LFD	a7, 14 * SIZE(AO7)	LFD	a8, 15 * SIZE(AO7)	addi	AO7, AO7, 16 * SIZE	nop	nop	PREFETCH_A7	FMADD	y09, alpha7, a1, y09	FMADD	y10, alpha7, a2, y10	FMADD	y11, alpha7, a3, y11	FMADD	y12, alpha7, a4, y12	LFD	a1,  0 * SIZE(AO8)	LFD	a2,  1 * SIZE(AO8)	LFD	a3,  2 * SIZE(AO8)	LFD	a4,  3 * SIZE(AO8)	FMADD	y13, alpha7, a5, y13	FMADD	y14, alpha7, a6, y14	FMADD	y15, alpha7, a7, y15	FMADD	y16, alpha7, a8, y16	LFD	a5,  4 * SIZE(AO8)	LFD	a6,  5 * SIZE(AO8)	LFD	a7,  6 * SIZE(AO8)	LFD	a8,  7 * SIZE(AO8)	FMADD	y01, alpha8, a1, y01	FMADD	y02, alpha8, a2, y02	FMADD	y03, alpha8, a3, y03	FMADD	y04, alpha8, a4, y04	LFD	a1,  8 * SIZE(AO8)	LFD	a2,  9 * SIZE(AO8)	LFD	a3, 10 * SIZE(AO8)	LFD	a4, 11 * SIZE(AO8)	FMADD	y05, alpha8, a5, y05	FMADD	y06, alpha8, a6, y06	FMADD	y07, alpha8, a7, y07	FMADD	y08, alpha8, a8, y08	LFD	a5, 12 * SIZE(AO8)	LFD	a6, 13 * SIZE(AO8)	LFD	a7, 14 * SIZE(AO8)	LFD	a8, 15 * SIZE(AO8)	addi	AO8, AO8, 16 * SIZE	nop	nop	PREFETCH_A8	FMADD	y09, alpha8, a1, y09	FMADD	y10, alpha8, a2, y10	FMADD	y11, alpha8, a3, y11	FMADD	y12, alpha8, a4, y12	LFD	a1,  0 * SIZE(AO1)	LFD	a2,  1 * SIZE(AO1)	LFD	a3,  2 * SIZE(AO1)	LFD	a4,  3 * SIZE(AO1)	FMADD	y13, alpha8, a5, y13	FMADD	y14, alpha8, a6, y14	FMADD	y15, alpha8, a7, y15	FMADD	y16, alpha8, a8, y16	LFD	a5,  4 * SIZE(AO1)	LFD	a6,  5 * SIZE(AO1)	LFD	a7,  6 * SIZE(AO1)	LFD	a8,  7 * SIZE(AO1)	STFD	y01,  0 * SIZE(Y1)	STFD	y02,  1 * SIZE(Y1)	STFD	y03,  2 * SIZE(Y1)	STFD	y04,  3 * SIZE(Y1)	LFD	y01, 16 * SIZE(Y1)	LFD	y02, 17 * SIZE(Y1)	LFD	y03, 18 * SIZE(Y1)	LFD	y04, 19 * SIZE(Y1)	PREFETCH_Y	bdz	LL(13)	.align 4LL(12):	FMADD	y01, alpha1, a1, y01	FMADD	y02, alpha1, a2, y02	FMADD	y03, alpha1, a3, y03	FMADD	y04, alpha1, a4, y04	LFD	a1,  8 * SIZE(AO1)	LFD	a2,  9 * SIZE(AO1)	LFD	a3, 10 * SIZE(AO1)	LFD	a4, 11 * SIZE(AO1)	STFD	y05,  4 * SIZE(Y1)	STFD	y06,  5 * SIZE(Y1)	STFD	y07,  6 * SIZE(Y1)	STFD	y08,  7 * SIZE(Y1)	LFD	y05, 20 * SIZE(Y1)	LFD	y06, 21 * SIZE(Y1)	LFD	y07, 22 * SIZE(Y1)	LFD	y08, 23 * SIZE(Y1)	FMADD	y05, alpha1, a5, y05	FMADD	y06, alpha1, a6, y06	FMADD	y07, alpha1, a7, y07	FMADD	y08, alpha1, a8, y08	LFD	a5, 12 * SIZE(AO1)	LFD	a6, 13 * SIZE(AO1)	LFD	a7, 14 * SIZE(AO1)	LFD	a8, 15 * SIZE(AO1)	STFD	y09,  8 * SIZE(Y1)	STFD	y10,  9 * SIZE(Y1)	STFD	y11, 10 * SIZE(Y1)	STFD	y12, 11 * SIZE(Y1)	LFD	y09, 24 * SIZE(Y1)	LFD	y10, 25 * SIZE(Y1)	LFD	y11, 26 * SIZE(Y1)	LFD	y12, 27 * SIZE(Y1)	FMADD	y09, alpha1, a1, y09	FMADD	y10, alpha1, a2, y10	FMADD	y11, alpha1, a3, y11	FMADD	y12, alpha1, a4, y12	LFD	a1,  0 * SIZE(AO2)	LFD	a2,  1 * SIZE(AO2)	LFD	a3,  2 * SIZE(AO2)	LFD	a4,  3 * SIZE(AO2)	STFD	y13, 12 * SIZE(Y1)	STFD	y14, 13 * SIZE(Y1)	STFD	y15, 14 * SIZE(Y1)	STFD	y16, 15 * SIZE(Y1)	LFD	y13, 28 * SIZE(Y1)	LFD	y14, 29 * SIZE(Y1)	LFD	y15, 30 * SIZE(Y1)	LFD	y16, 31 * SIZE(Y1)	FMADD	y13, alpha1, a5, y13	FMADD	y14, alpha1, a6, y14	FMADD	y15, alpha1, a7, y15	FMADD	y16, alpha1, a8, y16	LFD	a5,  4 * SIZE(AO2)	LFD	a6,  5 * SIZE(AO2)	LFD	a7,  6 * SIZE(AO2)	LFD	a8,  7 * SIZE(AO2)	FMADD	y01, alpha2, a1, y01	FMADD	y02, alpha2, a2, y02	FMADD	y03, alpha2, a3, y03	FMADD	y04, alpha2, a4, y04	LFD	a1,  8 * SIZE(AO2)	LFD	a2,  9 * SIZE(AO2)	LFD	a3, 10 * SIZE(AO2)	LFD	a4, 11 * SIZE(AO2)	FMADD	y05, alpha2, a5, y05	FMADD	y06, alpha2, a6, y06	FMADD	y07, alpha2, a7, y07	FMADD	y08, alpha2, a8, y08	LFD	a5, 12 * SIZE(AO2)	LFD	a6, 13 * SIZE(AO2)	LFD	a7, 14 * SIZE(AO2)	LFD	a8, 15 * SIZE(AO2)	FMADD	y09, alpha2, a1, y09	FMADD	y10, alpha2, a2, y10	FMADD	y11, alpha2, a3, y11	FMADD	y12, alpha2, a4, y12	LFD	a1,  0 * SIZE(AO3)	LFD	a2,  1 * SIZE(AO3)	LFD	a3,  2 * SIZE(AO3)	LFD	a4,  3 * SIZE(AO3)	FMADD	y13, alpha2, a5, y13	FMADD	y14, alpha2, a6, y14	FMADD	y15, alpha2, a7, y15	FMADD	y16, alpha2, a8, y16	LFD	a5,  4 * SIZE(AO3)	LFD	a6,  5 * SIZE(AO3)	LFD	a7,  6 * SIZE(AO3)	LFD	a8,  7 * SIZE(AO3)	FMADD	y01, alpha3, a1, y01	FMADD	y02, alpha3, a2, y02	FMADD	y03, alpha3, a3, y03	FMADD	y04, alpha3, a4, y04	LFD	a1,  8 * SIZE(AO3)	LFD	a2,  9 * SIZE(AO3)	LFD	a3, 10 * SIZE(AO3)	LFD	a4, 11 * SIZE(AO3)	FMADD	y05, alpha3, a5, y05	FMADD	y06, alpha3, a6, y06	FMADD	y07, alpha3, a7, y07	FMADD	y08, alpha3, a8, y08	LFD	a5, 12 * SIZE(AO3)	LFD	a6, 13 * SIZE(AO3)	LFD	a7, 14 * SIZE(AO3)	LFD	a8, 15 * SIZE(AO3)	FMADD	y09, alpha3, a1, y09	FMADD	y10, alpha3, a2, y10	FMADD	y11, alpha3, a3, y11	FMADD	y12, alpha3, a4, y12	LFD	a1,  0 * SIZE(AO4)	LFD	a2,  1 * SIZE(AO4)	LFD	a3,  2 * SIZE(AO4)	LFD	a4,  3 * SIZE(AO4)	FMADD	y13, alpha3, a5, y13	FMADD	y14, alpha3, a6, y14	FMADD	y15, alpha3, a7, y15	FMADD	y16, alpha3, a8, y16	LFD	a5,  4 * SIZE(AO4)	LFD	a6,  5 * SIZE(AO4)	LFD	a7,  6 * SIZE(AO4)	LFD	a8,  7 * SIZE(AO4)	FMADD	y01, alpha4, a1, y01	FMADD	y02, alpha4, a2, y02	FMADD	y03, alpha4, a3, y03	FMADD	y04, alpha4, a4, y04	LFD	a1,  8 * SIZE(AO4)	LFD	a2,  9 * SIZE(AO4)	LFD	a3, 10 * SIZE(AO4)	LFD	a4, 11 * SIZE(AO4)	FMADD	y05, alpha4, a5, y05	FMADD	y06, alpha4, a6, y06	FMADD	y07, alpha4, a7, y07	FMADD	y08, alpha4, a8, y08	LFD	a5, 12 * SIZE(AO4)	LFD	a6, 13 * SIZE(AO4)	LFD	a7, 14 * SIZE(AO4)	LFD	a8, 15 * SIZE(AO4)	addi	AO1, AO1, 16 * SIZE	addi	AO2, AO2, 16 * SIZE	addi	AO3, AO3, 16 * SIZE	addi	AO4, AO4, 16 * SIZE	PREFETCH_A1	PREFETCH_A2	PREFETCH_A3	PREFETCH_A4	FMADD	y09, alpha4, a1, y09	FMADD	y10, alpha4, a2, y10	FMADD	y11, alpha4, a3, y11	FMADD	y12, alpha4, a4, y12	LFD	a1,  0 * SIZE(AO5)	LFD	a2,  1 * SIZE(AO5)	LFD	a3,  2 * SIZE(AO5)	LFD	a4,  3 * SIZE(AO5)	FMADD	y13, alpha4, a5, y13	FMADD	y14, alpha4, a6, y14	FMADD	y15, alpha4, a7, y15	FMADD	y16, alpha4, a8, y16	LFD	a5,  4 * SIZE(AO5)	LFD	a6,  5 * SIZE(AO5)	LFD	a7,  6 * SIZE(AO5)	LFD	a8,  7 * SIZE(AO5)	FMADD	y01, alpha5, a1, y01	FMADD	y02, alpha5, a2, y02	FMADD	y03, alpha5, a3, y03	FMADD	y04, alpha5, a4, y04	LFD	a1,  8 * SIZE(AO5)	LFD	a2,  9 * SIZE(AO5)	LFD	a3, 10 * SIZE(AO5)	LFD	a4, 11 * SIZE(AO5)	FMADD	y05, alpha5, a5, y05	FMADD	y06, alpha5, a6, y06	FMADD	y07, alpha5, a7, y07	FMADD	y08, alpha5, a8, y08	LFD	a5, 12 * SIZE(AO5)	LFD	a6, 13 * SIZE(AO5)	LFD	a7, 14 * SIZE(AO5)	LFD	a8, 15 * SIZE(AO5)	FMADD	y09, alpha5, a1, y09	FMADD	y10, alpha5, a2, y10	FMADD	y11, alpha5, a3, y11	FMADD	y12, alpha5, a4, y12	LFD	a1,  0 * SIZE(AO6)	LFD	a2,  1 * SIZE(AO6)	LFD	a3,  2 * SIZE(AO6)	LFD	a4,  3 * SIZE(AO6)	FMADD	y13, alpha5, a5, y13	FMADD	y14, alpha5, a6, y14	FMADD	y15, alpha5, a7, y15	FMADD	y16, alpha5, a8, y16	LFD	a5,  4 * SIZE(AO6)	LFD	a6,  5 * SIZE(AO6)	LFD	a7,  6 * SIZE(AO6)	LFD	a8,  7 * SIZE(AO6)	FMADD	y01, alpha6, a1, y01	FMADD	y02, alpha6, a2, y02	FMADD	y03, alpha6, a3, y03	FMADD	y04, alpha6, a4, y04	LFD	a1,  8 * SIZE(AO6)	LFD	a2,  9 * SIZE(AO6)	LFD	a3, 10 * SIZE(AO6)	LFD	a4, 11 * SIZE(AO6)	FMADD	y05, alpha6, a5, y05	FMADD	y06, alpha6, a6, y06	FMADD	y07, alpha6, a7, y07	FMADD	y08, alpha6, a8, y08	LFD	a5, 12 * SIZE(AO6)	LFD	a6, 13 * SIZE(AO6)	LFD	a7, 14 * SIZE(AO6)	LFD	a8, 15 * SIZE(AO6)	FMADD	y09, alpha6, a1, y09	FMADD	y10, alpha6, a2, y10	FMADD	y11, alpha6, a3, y11	FMADD	y12, alpha6, a4, y12	LFD	a1,  0 * SIZE(AO7)	LFD	a2,  1 * SIZE(AO7)	LFD	a3,  2 * SIZE(AO7)	LFD	a4,  3 * SIZE(AO7)	FMADD	y13, alpha6, a5, y13	FMADD	y14, alpha6, a6, y14	FMADD	y15, alpha6, a7, y15	FMADD	y16, alpha6, a8, y16	LFD	a5,  4 * SIZE(AO7)	LFD	a6,  5 * SIZE(AO7)	LFD	a7,  6 * SIZE(AO7)	LFD	a8,  7 * SIZE(AO7)	FMADD	y01, alpha7, a1, y01	FMADD	y02, alpha7, a2, y02	FMADD	y03, alpha7, a3, y03	FMADD	y04, alpha7, a4, y04	LFD	a1,  8 * SIZE(AO7)	LFD	a2,  9 * SIZE(AO7)	LFD	a3, 10 * SIZE(AO7)	LFD	a4, 11 * SIZE(AO7)	FMADD	y05, alpha7, a5, y05	FMADD	y06, alpha7, a6, y06	FMADD	y07, alpha7, a7, y07	FMADD	y08, alpha7, a8, y08	LFD	a5, 12 * SIZE(AO7)	LFD	a6, 13 * SIZE(AO7)	LFD	a7, 14 * SIZE(AO7)	LFD	a8, 15 * SIZE(AO7)	FMADD	y09, alpha7, a1, y09	FMADD	y10, alpha7, a2, y10	FMADD	y11, alpha7, a3, y11	FMADD	y12, alpha7, a4, y12	LFD	a1,  0 * SIZE(AO8)	LFD	a2,  1 * SIZE(AO8)	LFD	a3,  2 * SIZE(AO8)	LFD	a4,  3 * SIZE(AO8)	FMADD	y13, alpha7, a5, y13	FMADD	y14, alpha7, a6, y14	FMADD	y15, alpha7, a7, y15	FMADD	y16, alpha7, a8, y16	LFD	a5,  4 * SIZE(AO8)	LFD	a6,  5 * SIZE(AO8)	LFD	a7,  6 * SIZE(AO8)	LFD	a8,  7 * SIZE(AO8)	FMADD	y01, alpha8, a1, y01	FMADD	y02, alpha8, a2, y02	FMADD	y03, alpha8, a3, y03	FMADD	y04, alpha8, a4, y04	LFD	a1,  8 * SIZE(AO8)	LFD	a2,  9 * SIZE(AO8)	LFD	a3, 10 * SIZE(AO8)	LFD	a4, 11 * SIZE(AO8)	FMADD	y05, alpha8, a5, y05	FMADD	y06, alpha8, a6, y06	FMADD	y07, alpha8, a7, y07	FMADD	y08, alpha8, a8, y08	LFD	a5, 12 * SIZE(AO8)	LFD	a6, 13 * SIZE(AO8)	LFD	a7, 14 * SIZE(AO8)	LFD	a8, 15 * SIZE(AO8)	addi	AO5, AO5, 16 * SIZE	addi	AO6, AO6, 16 * SIZE	addi	AO7, AO7, 16 * SIZE	addi	AO8, AO8, 16 * SIZE	PREFETCH_A5	PREFETCH_A6	PREFETCH_A7	PREFETCH_A8	FMADD	y09, alpha8, a1, y09	FMADD	y10, alpha8, a2, y10	FMADD	y11, alpha8, a3, y11	FMADD	y12, alpha8, a4, y12	LFD	a1,  0 * SIZE(AO1)	LFD	a2,  1 * SIZE(AO1)	LFD	a3,  2 * SIZE(AO1)	LFD	a4,  3 * SIZE(AO1)	FMADD	y13, alpha8, a5, y13	FMADD	y14, alpha8, a6, y14	FMADD	y15, alpha8, a7, y15	FMADD	y16, alpha8, a8, y16	LFD	a5,  4 * SIZE(AO1)	LFD	a6,  5 * SIZE(AO1)	LFD	a7,  6 * SIZE(AO1)	LFD	a8,  7 * SIZE(AO1)	STFD	y01, 16 * SIZE(Y1)	STFD	y02, 17 * SIZE(Y1)	STFD	y03, 18 * SIZE(Y1)	STFD	y04, 19 * SIZE(Y1)	LFD	y01, 32 * SIZE(Y1)	LFD	y02, 33 * SIZE(Y1)	LFD	y03, 34 * SIZE(Y1)	LFD	y04, 35 * SIZE(Y1)	PREFETCH_Y	addi	Y1, Y1, 16 * SIZE	bdnz	LL(12)	.align 4LL(13):	STFD	y05,  4 * SIZE(Y1)	STFD	y06,  5 * SIZE(Y1)	STFD	y07,  6 * SIZE(Y1)	STFD	y08,  7 * SIZE(Y1)	STFD	y09,  8 * SIZE(Y1)	STFD	y10,  9 * SIZE(Y1)	STFD	y11, 10 * SIZE(Y1)	STFD	y12, 11 * SIZE(Y1)	STFD	y13, 12 * SIZE(Y1)	STFD	y14, 13 * SIZE(Y1)	STFD	y15, 14 * SIZE(Y1)	STFD	y16, 15 * SIZE(Y1)	addi	Y1, Y1, 16 * SIZE	.align 4LL(15):	andi.	r0, M, 15	ble	LL(19)	andi.	r0, M, 8	ble	LL(16)	LFD	y01, 0 * SIZE(Y1)	LFD	y02, 1 * SIZE(Y1)	LFD	y03, 2 * SIZE(Y1)	LFD	y04, 3 * SIZE(Y1)	LFD	a1,  0 * SIZE(AO1)	LFD	a2,  1 * SIZE(AO1)	LFD	a3,  2 * SIZE(AO1)	LFD	a4,  3 * SIZE(AO1)	LFD	y05, 4 * SIZE(Y1)	LFD	y06, 5 * SIZE(Y1)	LFD	y07, 6 * SIZE(Y1)	LFD	y08, 7 * SIZE(Y1)	LFD	a5,  4 * SIZE(AO1)	LFD	a6,  5 * SIZE(AO1)	LFD	a7,  6 * SIZE(AO1)	LFD	a8,  7 * SIZE(AO1)	FMADD	y01, alpha1, a1, y01	LFD	a1,  0 * SIZE(AO2)	FMADD	y02, alpha1, a2, y02	LFD	a2,  1 * SIZE(AO2)	FMADD	y03, alpha1, a3, y03	LFD	a3,  2 * SIZE(AO2)	FMADD	y04, alpha1, a4, y04	LFD	a4,  3 * SIZE(AO2)	FMADD	y05, alpha1, a5, y05	LFD	a5,  4 * SIZE(AO2)	FMADD	y06, alpha1, a6, y06	LFD	a6,  5 * SIZE(AO2)	FMADD	y07, alpha1, a7, y07	LFD	a7,  6 * SIZE(AO2)	FMADD	y08, alpha1, a8, y08	LFD	a8,  7 * SIZE(AO2)	FMADD	y01, alpha2, a1, y01	LFD	a1,  0 * SIZE(AO3)	FMADD	y02, alpha2, a2, y02	LFD	a2,  1 * SIZE(AO3)	FMADD	y03, alpha2, a3, y03	LFD	a3,  2 * SIZE(AO3)	FMADD	y04, alpha2, a4, y04	LFD	a4,  3 * SIZE(AO3)	FMADD	y05, alpha2, a5, y05	LFD	a5,  4 * SIZE(AO3)	FMADD	y06, alpha2, a6, y06	LFD	a6,  5 * SIZE(AO3)	FMADD	y07, alpha2, a7, y07	LFD	a7,  6 * SIZE(AO3)	FMADD	y08, alpha2, a8, y08	LFD	a8,  7 * SIZE(AO3)	FMADD	y01, alpha3, a1, y01	LFD	a1,  0 * SIZE(AO4)	FMADD	y02, alpha3, a2, y02	LFD	a2,  1 * SIZE(AO4)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -