⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zgemm_kernel_2x2.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 2 页
字号:
$L20:	and	M,  1, I	ble	I, $L29	.align 4$L21:	LD	a1,  0 * SIZE(AO)	fclr	c02	LD	a2,  1 * SIZE(AO)	fclr	c06	LD	a3,  2 * SIZE(AO)	fclr	c10	LD	a4,  3 * SIZE(AO)	fclr	c14	LD	b1,  0 * SIZE(B)	lda	L,        -2(K)	LD	b2,  1 * SIZE(B)	lda	AO,  2 * SIZE(AO)	LD	b3,  2 * SIZE(B)	LD	b4,  3 * SIZE(B)	lda	BO,  4 * SIZE(B)	ble	L, $L25	.align	5$L22:	ADD	c09, t1, c09	unop	MUL	a1, b1, t1	unop	ADD	c10, t2, c10	unop	MUL	a2, b1, t2	LD	b1,  0 * SIZE(BO)	ADD	c13, t3, c13	unop	MUL	a1, b2, t3	lda	BO,    8 * SIZE(BO)	ADD	c14, t4, c14	unop	MUL	a2, b2, t4	LD	b2, -7 * SIZE(BO)	ADD	c01, t1, c01	unop	MUL	a1, b3, t1	unop	ADD	c02, t2, c02	unop	MUL	a2, b3, t2	LD	b3, -6 * SIZE(BO)	ADD	c05, t3, c05	unop	MUL	a1, b4, t3	LD	a1,  2 * SIZE(AO)	ADD	c06, t4, c06	MUL	a2, b4, t4	LD	b5, -5 * SIZE(BO)	ADD	c09, t1, c09	unop	MUL	a3, b1, t1	LD	a2,  3 * SIZE(AO)	ADD	c10, t2, c10	unop	MUL	a4, b1, t2	LD	b1, -4 * SIZE(BO)	ADD	c13, t3, c13	unop	MUL	a3, b2, t3	lda	AO,    4 * SIZE(AO)	ADD	c14, t4, c14	MUL	a4, b2, t4	LD	b2, -3 * SIZE(BO)	ADD	c01, t1, c01	lda	L,        -2(L)	MUL	a3, b3, t1	LD	b4, -1 * SIZE(BO)	ADD	c02, t2, c02	unop	MUL	a4, b3, t2	LD	b3, -2 * SIZE(BO)	ADD	c05, t3, c05	unop	MUL	a3, b5, t3	LD	a3,  0 * SIZE(AO)	ADD	c06, t4, c06	MUL	a4, b5, t4	LD	a4,  1 * SIZE(AO)	bgt	L,  $L22	.align 4$L25:	ADD	c09, t1, c09	ldt	alpha_r, ALPHA_R	MUL	a1, b1, t1	blbs	K, $L28	.align 4	ADD	c10, t2, c10	unop	MUL	a2, b1, t2	LD	b1,  0 * SIZE(BO)	ADD	c13, t3, c13	unop	MUL	a1, b2, t3	unop	ADD	c14, t4, c14	unop	MUL	a2, b2, t4	LD	b2,  1 * SIZE(BO)	ADD	c01, t1, c01	unop	MUL	a1, b3, t1	lda	AO,  2 * SIZE(AO)	ADD	c02, t2, c02	unop	MUL	a2, b3, t2	LD	b3,  2 * SIZE(BO)	ADD	c05, t3, c05	unop	MUL	a1, b4, t3	LD	a1, -2 * SIZE(AO)	ADD	c06, t4, c06	unop	MUL	a2, b4, t4	LD	a2, -1 * SIZE(AO)	ADD	c09, t1, c09	LD	b4,  3 * SIZE(BO)	MUL	a1, b1, t1	lda	BO,  4 * SIZE(BO)	.align 4$L28:	ADD	c10, t2, c10	unop	MUL	a2, b1, t2	ldt	alpha_i, ALPHA_I	ADD	c13, t3, c13	unop	MUL	a1, b2, t3	LD	c03, 0 * SIZE(C1)	ADD	c14, t4, c14	unop	MUL	a2, b2, t4	LD	c04, 1 * SIZE(C1)	ADD	c01, t1, c01	unop	MUL	a1, b3, t1	LD	c11, 0 * SIZE(C2)	ADD	c02, t2, c02	unop	MUL	a2, b3, t2	LD	c12, 1 * SIZE(C2)	ADD	c05, t3, c05	MUL	a1, b4, t3	ADD	c06, t4, c06	MUL	a2, b4, t4	ADD	c09, t1, c09	ADD	c10, t2, c10	ADD	c13, t3, c13	ADD	c14, t4, c14#if   defined(NN) || defined(NT) || defined(TN) || defined(TT) || \      defined(CC) || defined(CR) || defined(RC) || defined(RR)	SUB	c01, c06, c01	ADD	c02, c05, c02	SUB	c09, c14, c09	ADD	c10, c13, c10#else	ADD	c01, c06, c01	SUB	c02, c05, c02	ADD	c09, c14, c09	SUB	c10, c13, c10#endif	MUL	  alpha_r, c01, t1	MUL	  alpha_r, c02, t2	MUL	  alpha_r, c09, t3	MUL	  alpha_r, c10, t4#if   defined(NN) || defined(NT) || defined(TN) || defined(TT) || \      defined(NC) || defined(TC) || defined(NR) || defined(TR)	ADD	  c03,  t1,  c03	MUL	  alpha_i, c02, t1	ADD	  c04,  t2,  c04	MUL	  alpha_i, c01, t2	ADD	  c11,  t3,  c11	MUL	  alpha_i, c10, t3	ADD	  c12,  t4,  c12	MUL	  alpha_i, c09, t4	SUB	  c03,  t1,  c03	ADD	  c04,  t2,  c04	SUB	  c11,  t3,  c11	ADD	  c12,  t4,  c12#else	ADD	  c03,  t1,  c03	MUL	  alpha_i, c02, t1	SUB	  c04,  t2,  c04	MUL	  alpha_i, c01, t2	ADD	  c11,  t3,  c11	MUL	  alpha_i, c10, t3	SUB	  c12,  t4,  c12	MUL	  alpha_i, c09, t4	ADD	  c03,  t1,  c03	ADD	  c04,  t2,  c04	ADD	  c11,  t3,  c11	ADD	  c12,  t4,  c12#endif	ST	c03,  0 * SIZE(C1)	ST	c04,  1 * SIZE(C1)	ST	c11,  0 * SIZE(C2) 	ST	c12,  1 * SIZE(C2)	.align 4$L29: 	mov	BO, B	unop	unop	bgt	J, $L01	.align 4$L30:	and	N, 1, J	mov	C,  C1	mov	A, AO	ble	J, $L999	.align 4$L40:	sra	M,  1, I	fclr	t1	fclr	t2	fclr	t3	fclr	t4	fclr	c01	fclr	c05 	fclr	c02	fclr	c06	ble	I, $L50	.align 4$L41:	LD	a1,  0 * SIZE(AO)	fclr	c03	LD	a2,  1 * SIZE(AO)	fclr	c07	LD	a3,  2 * SIZE(AO)	fclr	c04	LD	a4,  3 * SIZE(AO)	fclr	c08	LD	b1,  0 * SIZE(B)	LD	b2,  1 * SIZE(B)	LD	b3,  2 * SIZE(B)	LD	b4,  3 * SIZE(B)	lda	L,        -2(K)	lda	BO,  2 * SIZE(B)	lda	AO,  4 * SIZE(AO)	ble	L, $L45	.align	5$L42:	ADD	c05, t1, c05	unop	MUL	a1, b1, t1	unop	ADD	c06, t2, c06	lda	L,   -2(L)	MUL	a2, b1, t2	unop	ADD	c07, t3, c07	unop	MUL	a3, b1, t3	unop	ADD	c08, t4, c08	unop	MUL	a4, b1, t4	LD	b1,  2 * SIZE(BO)	ADD	c01, t1, c01	unop	MUL	a1, b2, t1	LD	a1,  0 * SIZE(AO)	ADD	c02, t2, c02	lda	BO,  4 * SIZE(BO)	MUL	a2, b2, t2	LD	a2,  1 * SIZE(AO)	ADD	c03, t3, c03	unop	MUL	a3, b2, t3	LD	a3,  2 * SIZE(AO)	ADD	c04, t4, c04	unop	MUL	a4, b2, t4	LD	a5,  3 * SIZE(AO)	ADD	c05, t1, c05	unop	MUL	a1, b3, t1	LD	b2, -1 * SIZE(BO)	ADD	c06, t2, c06	unop	MUL	a2, b3, t2	unop	ADD	c07, t3, c07	unop	MUL	a3, b3, t3	lda	AO,  8 * SIZE(AO)	ADD	c08, t4, c08	unop	MUL	a5, b3, t4	LD	b3,  0 * SIZE(BO)	ADD	c01, t1, c01	unop	MUL	a1, b4, t1	LD	a1, -4 * SIZE(AO)	ADD	c02, t2, c02	unop	MUL	a2, b4, t2	LD	a2, -3 * SIZE(AO)	ADD	c03, t3, c03	LD	a4, -1 * SIZE(AO)	MUL	a3, b4, t3	LD	a3, -2 * SIZE(AO)	ADD	c04, t4, c04	MUL	a5, b4, t4	LD	b4,  1 * SIZE(BO)	bgt	L,  $L42	.align 4$L45:	ADD	c05, t1, c05	ldt	alpha_r, ALPHA_R	MUL	b1, a1, t1	blbs	K, $L48	.align 4	ADD	c06, t2, c06	MUL	a2, b1, t2	ADD	c07, t3, c07	MUL	a3, b1, t3	ADD	c08, t4, c08	unop	MUL	a4, b1, t4	LD	b1,  0 * SIZE(BO)	ADD	c01, t1, c01	unop	MUL	a1, b2, t1	LD	a1,  0 * SIZE(AO)	ADD	c02, t2, c02	unop	MUL	a2, b2, t2	LD	a2,  1 * SIZE(AO)	ADD	c03, t3, c03	unop	MUL	a3, b2, t3	LD	a3,  2 * SIZE(AO)	ADD	c04, t4, c04	MUL	a4, b2, t4	LD	a4,  3 * SIZE(AO)	lda	AO,  4 * SIZE(AO)	ADD	c05, t1, c05	LD	b2,  1 * SIZE(BO)	MUL	a1, b1, t1	lda	BO,  2 * SIZE(BO)	.align 4$L48:	ADD	c06, t2, c06	unop	MUL	a2, b1, t2	ldt	alpha_i, ALPHA_I	ADD	c07, t3, c07	lda	I,        -1(I)	MUL	a3, b1, t3	LD	c09, 0 * SIZE(C1)	ADD	c08, t4, c08	unop	MUL	a4, b1, t4	LD	c10, 1 * SIZE(C1)	ADD	c01, t1, c01	unop	MUL	a1, b2, t1	LD	c11, 2 * SIZE(C1)	ADD	c02, t2, c02	unop	MUL	a2, b2, t2	LD	c12, 3 * SIZE(C1)	ADD	c03, t3, c03	MUL	a3, b2, t3	ADD	c04, t4, c04	MUL	a4, b2, t4	ADD	c05, t1, c05	ADD	c06, t2, c06	ADD	c07, t3, c07	ADD	c08, t4, c08#if   defined(NN) || defined(NT) || defined(TN) || defined(TT) || \      defined(CC) || defined(CR) || defined(RC) || defined(RR)	SUB	c01, c06, c01	ADD	c02, c05, c02	SUB	c03, c08, c03	ADD	c04, c07, c04#else	ADD	c01, c06, c01	SUB	c02, c05, c02	ADD	c03, c08, c03	SUB	c04, c07, c04#endif	MUL	  alpha_r, c01, t1	MUL	  alpha_r, c02, t2	MUL	  alpha_r, c03, t3	MUL	  alpha_r, c04, t4#if   defined(NN) || defined(NT) || defined(TN) || defined(TT) || \      defined(NC) || defined(TC) || defined(NR) || defined(TR)	ADD	  c09,  t1,  c09	MUL	  alpha_i, c02, t1	ADD	  c10,  t2,  c10	MUL	  alpha_i, c01, t2	ADD	  c11,  t3,  c11	MUL	  alpha_i, c04, t3	ADD	  c12,  t4,  c12	MUL	  alpha_i, c03, t4	SUB	  c09,  t1,  c09	fclr	t1	ADD	  c10,  t2,  c10	fclr	t2	SUB	  c11,  t3,  c11	fclr	t3	ADD	  c12,  t4,  c12	fclr	t4#else	ADD	  c09,  t1,  c09	MUL	  alpha_i, c02, t1	SUB	  c10,  t2,  c10	MUL	  alpha_i, c01, t2	ADD	  c11,  t3,  c11	MUL	  alpha_i, c04, t3	SUB	  c12,  t4,  c12	MUL	  alpha_i, c03, t4	ADD	  c09,  t1,  c09	fclr	t1	ADD	  c10,  t2,  c10	fclr	t2	ADD	  c11,  t3,  c11	fclr	t3	ADD	  c12,  t4,  c12	fclr	t4#endif	ST	c09,  0 * SIZE(C1)	fclr	c01	ST	c10,  1 * SIZE(C1)	fclr	c02	ST	c11,  2 * SIZE(C1)	unop 	fclr	c05	unop	ST	c12,  3 * SIZE(C1)	fclr	c06	lda	C1,   4 * SIZE(C1)	bgt	I, $L41	.align 4$L50:	and	M,  1, I	ble	I, $L999	.align 4$L51:	LD	a1,  0 * SIZE(AO)	LD	a2,  1 * SIZE(AO)	LD	a3,  2 * SIZE(AO)	LD	a4,  3 * SIZE(AO)	LD	b1,  0 * SIZE(B)	lda	L,        -2(K)	LD	b2,  1 * SIZE(B)	lda	AO,  2 * SIZE(AO)	LD	b3,  2 * SIZE(B)	LD	b4,  3 * SIZE(B)	lda	BO,  2 * SIZE(B)	ble	L, $L55	.align	5$L52:	ADD	c01, t1, c01	unop	MUL	a1, b1, t1	unop	ADD	c02, t2, c02	lda	AO,    4 * SIZE(AO)	MUL	a2, b1, t2	LD	b1,  2 * SIZE(BO)	ADD	c05, t3, c05	lda	L,        -2(L)	MUL	a1, b2, t3	LD	a1, -2 * SIZE(AO)	ADD	c06, t4, c06	unop	MUL	a2, b2, t4	LD	a2, -1 * SIZE(AO)	ADD	c01, t1, c01	LD	b2,  3 * SIZE(BO)	MUL	a3, b3, t1	lda	BO,    4 * SIZE(BO)	ADD	c02, t2, c02	unop	MUL	a4, b3, t2	LD	b3,  0 * SIZE(BO)	ADD	c05, t3, c05	unop	MUL	a3, b4, t3	LD	a3,  0 * SIZE(AO)	ADD	c06, t4, c06	MUL	a4, b4, t4	LD	b4,  1 * SIZE(BO)	unop	LD	a4,  1 * SIZE(AO)	unop	unop	bgt	L,  $L52	.align 4$L55:	ADD	c01, t1, c01	ldt	alpha_r, ALPHA_R	MUL	a1, b1, t1	blbs	K, $L58	.align 4	ADD	c02, t2, c02	unop	MUL	a2, b1, t2	LD	b1,  0 * SIZE(BO)	ADD	c05, t3, c05	lda	BO,  2 * SIZE(BO)	MUL	a1, b2, t3	LD	a1,  0 * SIZE(AO)	ADD	c06, t4, c06	unop	MUL	a2, b2, t4	LD	a2,  1 * SIZE(AO)	ADD	c01, t1, c01	LD	b2, -1 * SIZE(BO)	MUL	a1, b1, t1	lda	AO,  2 * SIZE(AO)	.align 4$L58:	ADD	c02, t2, c02	unop	MUL	a2, b1, t2	ldt	alpha_i, ALPHA_I	ADD	c05, t3, c05	unop	MUL	a1, b2, t3	LD	c03, 0 * SIZE(C1)	ADD	c06, t4, c06	unop	MUL	a2, b2, t4	LD	c04, 1 * SIZE(C1)	ADD	c01, t1, c01	ADD	c02, t2, c02	ADD	c05, t3, c05	ADD	c06, t4, c06#if   defined(NN) || defined(NT) || defined(TN) || defined(TT) || \      defined(CC) || defined(CR) || defined(RC) || defined(RR)	SUB	c01, c06, c01	ADD	c02, c05, c02#else	ADD	c01, c06, c01	SUB	c02, c05, c02#endif	MUL	  alpha_r, c01, t1	MUL	  alpha_r, c02, t2	MUL	  alpha_i, c02, t3	MUL	  alpha_i, c01, t4#if   defined(NN) || defined(NT) || defined(TN) || defined(TT) || \      defined(NC) || defined(TC) || defined(NR) || defined(TR)	ADD	  c03,  t1,  c03	ADD	  c04,  t2,  c04	SUB	  c03,  t3,  c03	ADD	  c04,  t4,  c04#else	ADD	  c03,  t1,  c03	SUB	  c04,  t2,  c04	ADD	  c03,  t3,  c03	ADD	  c04,  t4,  c04#endif	ST	c03,  0 * SIZE(C1)	ST	c04,  1 * SIZE(C1)	.align 4$L999:	ldt	$f2,   0($sp)	ldt	$f3,   8($sp)	ldt	$f4,  16($sp)	ldt	$f5,  24($sp)	ldt	$f6,  32($sp)	ldt	$f7,  40($sp)	ldt	$f8,  48($sp)	ldt	$f9,  56($sp)	clr	$0	lda	$sp, STACKSIZE($sp)	ret	.ident	VERSION	.end	CNAME

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -