⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zgemm_kernel.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 2 页
字号:
	sub	K, KK, L#elif defined(LEFT)	add	KK, 1, L#else	add	KK, 2, L#endif	sra	L, 2, L	cmp	L,  0	LDF	[AO + 0 * SIZE], a1	FMOV	FZERO, t2	LDF	[BO + 0 * SIZE], b1	FMOV	FZERO, c06	LDF	[AO + 1 * SIZE], a2	FMOV	FZERO, t3	LDF	[BO + 1 * SIZE], b2	FMOV	FZERO, c08	LDF	[AO + 2 * SIZE], a3	FMOV	FZERO, t4	LDF	[BO + 2 * SIZE], b3	FMOV	FZERO, c01	LDF	[AO + 3 * SIZE], a4	FMOV	FZERO, c03	LDF	[BO + 3 * SIZE], b4	FMOV	FZERO, c05#endif	ble,pn	%icc, .LL55	FMOV	FZERO, c07.LL52:	FADD2	c02, t1, c02	add	AO,  8 * SIZE, AO	prefetch [AO + APREFETCHSIZE * SIZE], 0	FMUL	a1, b1, t1	add	BO, 16 * SIZE, BO	FADD4	c04, t2, c04	add	L, -1, L	FMUL	a1, b2, t2	FADD2	c06, t3, c06	cmp	L, 0	FMUL	a1, b3, t3	FADD4	c08, t4, c08	FMUL	a1, b4, t4	LDF	[AO -  4 * SIZE], a1	FADD1	c01, t1, c01	FMUL	a2, b1, t1	LDF	[BO - 12 * SIZE], b1	FADD3	c03, t2, c03	FMUL	a2, b2, t2	LDF	[BO - 11 * SIZE], b2	FADD1	c05, t3, c05	FMUL	a2, b3, t3	LDF	[BO - 10 * SIZE], b3	FADD3	c07, t4, c07	FMUL	a2, b4, t4	LDF	[BO -  9 * SIZE], b4	FADD2	c02, t1, c02	FMUL	a3, b1, t1	LDF	[AO -  3 * SIZE], a2	FADD4	c04, t2, c04	FMUL	a3, b2, t2	FADD2	c06, t3, c06	FMUL	a3, b3, t3	FADD4	c08, t4, c08	FMUL	a3, b4, t4	LDF	[AO -  2 * SIZE], a3	FADD1	c01, t1, c01	FMUL	a4, b1, t1	LDF	[BO -  8 * SIZE], b1	FADD3	c03, t2, c03	FMUL	a4, b2, t2	LDF	[BO -  7 * SIZE], b2	FADD1	c05, t3, c05	FMUL	a4, b3, t3	LDF	[BO -  6 * SIZE], b3	FADD3	c07, t4, c07	FMUL	a4, b4, t4	LDF	[BO -  5 * SIZE], b4	FADD2	c02, t1, c02	FMUL	a1, b1, t1	LDF	[AO -  1 * SIZE], a4	FADD4	c04, t2, c04	FMUL	a1, b2, t2	FADD2	c06, t3, c06	FMUL	a1, b3, t3	FADD4	c08, t4, c08	FMUL	a1, b4, t4	LDF	[AO +  0 * SIZE], a1	FADD1	c01, t1, c01	FMUL	a2, b1, t1	LDF	[BO -  4 * SIZE], b1	FADD3	c03, t2, c03	FMUL	a2, b2, t2	LDF	[BO -  3 * SIZE], b2	FADD1	c05, t3, c05	FMUL	a2, b3, t3	LDF	[BO -  2 * SIZE], b3	FADD3	c07, t4, c07	FMUL	a2, b4, t4	LDF	[BO -  1 * SIZE], b4	FADD2	c02, t1, c02	FMUL	a3, b1, t1	LDF	[AO +  1 * SIZE], a2	FADD4	c04, t2, c04	FMUL	a3, b2, t2	FADD2	c06, t3, c06	FMUL	a3, b3, t3	FADD4	c08, t4, c08	FMUL	a3, b4, t4	LDF	[AO +  2 * SIZE], a3	FADD1	c01, t1, c01	FMUL	a4, b1, t1	LDF	[BO +  0 * SIZE], b1	FADD3	c03, t2, c03	FMUL	a4, b2, t2	LDF	[BO +  1 * SIZE], b2	FADD1	c05, t3, c05	FMUL	a4, b3, t3	LDF	[BO +  2 * SIZE], b3	FADD3	c07, t4, c07	FMUL	a4, b4, t4	LDF	[BO +  3 * SIZE], b4	bg,pt	%icc, .LL52	LDF	[AO +  3 * SIZE], a4.LL55:#ifndef TRMMKERNEL	and	K, 3, L#else#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))	sub	K, KK, L#elif defined(LEFT)	add	KK, 1, L#else	add	KK, 2, L#endif	and	L, 3, L#endif	cmp	L,  0	ble,a,pn %icc, .LL59	nop.LL56:	FADD2	c02, t1, c02	add	AO, 2 * SIZE, AO	FMUL	a1, b1, t1	add	L, -1, L	add	BO, 4 * SIZE, BO	FADD4	c04, t2, c04	cmp	L, 0	FMUL	a1, b2, t2	FADD2	c06, t3, c06	FMUL	a1, b3, t3	FADD4	c08, t4, c08	FMUL	a1, b4, t4	LDF	[AO + 0 * SIZE], a1	FADD1	c01, t1, c01	FMUL	a2, b1, t1	LDF	[BO + 0 * SIZE], b1	FADD3	c03, t2, c03	FMUL	a2, b2, t2	LDF	[BO + 1 * SIZE], b2	FADD1	c05, t3, c05	FMUL	a2, b3, t3	LDF	[BO + 2 * SIZE], b3	FADD3	c07, t4, c07	FMUL	a2, b4, t4	LDF	[BO + 3 * SIZE], b4	bg,pt	%icc, .LL56	LDF	[AO + 1 * SIZE], a2.LL59:#ifndef TRMMKERNEL	FADD2	c02, t1, c02	LDF	[C1 + 0 * SIZE], a1	FADD4	c04, t2, c04	LDF	[C1 + 1 * SIZE], a2	FADD2	c06, t3, c06	LDF	[C2 + 0 * SIZE], a3	FADD4	c08, t4, c08	LDF	[C2 + 1 * SIZE], a4	FADD	c01, c04, c01	FMUL	ALPHA_R, c01, t1	FADD	c02, c03, c02	FMUL	ALPHA_R, c02, t2	FADD	c05, c08, c05	FMUL	ALPHA_R, c05, t3	FADD	c06, c07, c06	FMUL	ALPHA_R, c06, t4	FADD	a1, t1, a1	FMUL	ALPHA_I, c02, t1	FADD	a2, t2, a2	FMUL	ALPHA_I, c01, t2	FADD	a3, t3, a3	FMUL	ALPHA_I, c06, t3	FADD	a4, t4, a4	FMUL	ALPHA_I, c05, t4	FSUB	a1, t1, a1	FADD	a2, t2, a2	FSUB	a3, t3, a3	FADD	a4, t4, a4	STF	a1, [C1 + 0 * SIZE]	FMOV	FZERO, t1	STF	a2, [C1 + 1 * SIZE]	FMOV	FZERO, t2	STF	a3, [C2 + 0 * SIZE]	FMOV	FZERO, t3	STF	a4, [C2 + 1 * SIZE]	FMOV	FZERO, t4#else	FADD2	c02, t1, c02	FADD4	c04, t2, c04	FADD2	c06, t3, c06	FADD4	c08, t4, c08	FADD	c01, c04, c01	FADD	c02, c03, c02	FADD	c05, c08, c05	FADD	c06, c07, c06	STF	c01, [C1 + 0 * SIZE]	FMOV	FZERO, t1	STF	c02, [C1 + 1 * SIZE]	FMOV	FZERO, t2	STF	c05, [C2 + 0 * SIZE]	FMOV	FZERO, t3	STF	c06, [C2 + 1 * SIZE]	FMOV	FZERO, t4#endif	add	C1, 2 * SIZE, C1	add	C2, 2 * SIZE, C2#ifdef TRMMKERNEL#if ( defined(LEFT) &&  defined(TRANSA)) || \    (!defined(LEFT) && !defined(TRANSA))	sub	K, KK, TEMP1#ifdef LEFT	add	TEMP1, -1, TEMP1#else	add	TEMP1, -2, TEMP1#endif	sll	TEMP1, 0 + ZBASE_SHIFT, TEMP2	sll	TEMP1, 1 + ZBASE_SHIFT, TEMP1	add	AO, TEMP2, AO	add	BO, TEMP1, BO#endif#ifdef LEFT	add	KK, 1, KK#endif#endif.LL99:	add	J, -1, J	mov	BO, B	cmp	J, 0	bg,pt	%icc, .LL11#if defined(TRMMKERNEL) && !defined(LEFT)	add	KK, 2, KK#else	nop#endif.LL100:	sra	M, 1, I	and	N, 1, J	cmp	J, 0	ble,pn	%icc, .LL999	mov	A, AO	mov	C, C1	add	C, LDC, C#if defined(TRMMKERNEL) &&  defined(LEFT)	mov	OFFSET, KK#endif	cmp	I, 0	ble,pn	%icc, .LL150	FMOV	FZERO, c03.LL121:#if !defined(TRMMKERNEL)	LDF	[AO + 0 * SIZE], a1	sra	K, 2, L	FMOV	FZERO, t1	LDF	[B  + 0 * SIZE], b1	mov	B, BO	FMOV	FZERO, c07	LDF	[AO + 1 * SIZE], a2	cmp	L,  0	FMOV	FZERO, t2	LDF	[B  + 1 * SIZE], b2	FMOV	FZERO, c04	LDF	[AO + 2 * SIZE], a3	FMOV	FZERO, t3	LDF	[B  + 2 * SIZE], b3	FMOV	FZERO, c08	LDF	[AO + 3 * SIZE], a4	FMOV	FZERO, t4	LDF	[B  + 3 * SIZE], b4	FMOV	FZERO, c01	prefetch [C1 + 3 * SIZE], 3	FMOV	FZERO, c05	FMOV	FZERO, c02#else#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))	mov	B, BO#else	sll	KK, 1 + ZBASE_SHIFT, TEMP1	sll	KK, 0 + ZBASE_SHIFT, TEMP2	add	AO, TEMP1, AO	add	B,  TEMP2, BO#endif#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))	sub	K, KK, L#elif defined(LEFT)	add	KK, 2, L#else	add	KK, 1, L#endif	sra	L, 2, L	cmp	L,  0	LDF	[AO + 0 * SIZE], a1	FMOV	FZERO, t1	LDF	[BO + 0 * SIZE], b1	FMOV	FZERO, c07	LDF	[AO + 1 * SIZE], a2	FMOV	FZERO, t2	LDF	[BO + 1 * SIZE], b2	FMOV	FZERO, c04	LDF	[AO + 2 * SIZE], a3	FMOV	FZERO, t3	LDF	[BO + 2 * SIZE], b3	FMOV	FZERO, c08	LDF	[AO + 3 * SIZE], a4	FMOV	FZERO, t4	LDF	[BO + 3 * SIZE], b4	FMOV	FZERO, c01	prefetch [C1 + 3 * SIZE], 3	FMOV	FZERO, c05	FMOV	FZERO, c02#endif	ble,pn	%icc, .LL125	FMOV	FZERO, c06.LL122:	FADD1	c03, t1, c03	add	L, -1, L	FMUL	a1, b1, t1	prefetch [AO + APREFETCHSIZE * SIZE], 0	FADD3	c07, t2, c07	add	BO,  8 * SIZE, BO	FMUL	a1, b2, t2	LDF	[AO + 4 * SIZE], a1	FADD2	c04, t3, c04	add	AO, 16 * SIZE, AO	FMUL	a2, b1, t3	cmp	L,  0	FADD4	c08, t4, c08	nop	FMUL	a2, b2, t4	LDF	[AO - 11 * SIZE], a2	FADD1	c01, t1, c01	nop	FMUL	a3, b1, t1	nop	FADD3	c05, t2, c05	nop	FMUL	a3, b2, t2	LDF	[AO - 10 * SIZE], a3	FADD2	c02, t3, c02	nop	FMUL	a4, b1, t3	LDF	[BO -  4 * SIZE], b1	FADD4	c06, t4, c06	nop	FMUL	a4, b2, t4	LDF	[BO -  3 * SIZE], b2	FADD1	c03, t1, c03	nop	FMUL	a1, b3, t1	LDF	[AO -  9 * SIZE], a4	FADD3	c07, t2, c07	nop	FMUL	a1, b4, t2	LDF	[AO -  8 * SIZE], a1	FADD2	c04, t3, c04	nop	FMUL	a2, b3, t3	nop	FADD4	c08, t4, c08	nop	FMUL	a2, b4, t4	LDF	[AO -  7 * SIZE], a2	FADD1	c01, t1, c01	nop	FMUL	a3, b3, t1	nop	FADD3	c05, t2, c05	nop	FMUL	a3, b4, t2	LDF	[AO -  6 * SIZE], a3	FADD2	c02, t3, c02	nop	FMUL	a4, b3, t3	LDF	[BO -  2 * SIZE], b3	FADD4	c06, t4, c06	nop	FMUL	a4, b4, t4	LDF	[BO -  1 * SIZE], b4	FADD1	c03, t1, c03	nop	FMUL	a1, b1, t1	LDF	[AO -  5 * SIZE], a4	FADD3	c07, t2, c07	nop	FMUL	a1, b2, t2	LDF	[AO -  4 * SIZE], a1	FADD2	c04, t3, c04	nop	FMUL	a2, b1, t3	nop	FADD4	c08, t4, c08	nop	FMUL	a2, b2, t4	LDF	[AO -  3 * SIZE], a2	FADD1	c01, t1, c01	nop	FMUL	a3, b1, t1	nop	FADD3	c05, t2, c05	nop	FMUL	a3, b2, t2	LDF	[AO -  2 * SIZE], a3	FADD2	c02, t3, c02	nop	FMUL	a4, b1, t3	LDF	[BO +  0 * SIZE], b1	FADD4	c06, t4, c06	nop	FMUL	a4, b2, t4	LDF	[BO +  1 * SIZE], b2	FADD1	c03, t1, c03	nop	FMUL	a1, b3, t1	LDF	[AO -  1 * SIZE], a4	FADD3	c07, t2, c07	nop	FMUL	a1, b4, t2	LDF	[AO +  0 * SIZE], a1	FADD2	c04, t3, c04	nop	FMUL	a2, b3, t3	nop	FADD4	c08, t4, c08	nop	FMUL	a2, b4, t4	LDF	[AO +  1 * SIZE], a2	FADD1	c01, t1, c01	nop	FMUL	a3, b3, t1	nop	FADD3	c05, t2, c05	nop	FMUL	a3, b4, t2	LDF	[AO +  2 * SIZE], a3	FADD2	c02, t3, c02	nop	FMUL	a4, b3, t3	LDF	[BO +  2 * SIZE], b3	FADD4	c06, t4, c06	FMUL	a4, b4, t4	LDF	[AO +  3 * SIZE], a4	bg,pt	%icc, .LL122	LDF	[BO +  3 * SIZE], b4.LL125:#ifndef TRMMKERNEL	and	K, 3, L#else#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))	sub	K, KK, L#elif defined(LEFT)	add	KK, 2, L#else	add	KK, 1, L#endif	and	L, 3, L#endif	cmp	L,  0	ble,a,pn %icc, .LL129	nop.LL126:	FADD1	c03, t1, c03	add	AO, 4 * SIZE, AO	FMUL	a1, b1, t1	add	BO, 2 * SIZE, BO	FADD3	c07, t2, c07	add	L, -1, L	FMUL	a1, b2, t2	LDF	[AO + 0 * SIZE], a1	FADD2	c04, t3, c04	cmp	L, 0	FMUL	a2, b1, t3	FADD4	c08, t4, c08	FMUL	a2, b2, t4	LDF	[AO + 1 * SIZE], a2	FADD1	c01, t1, c01	FMUL	a3, b1, t1	FADD3	c05, t2, c05	FMUL	a3, b2, t2	LDF	[AO + 2 * SIZE], a3	FADD2	c02, t3, c02	FMUL	a4, b1, t3	LDF	[BO + 0 * SIZE], b1	FADD4	c06, t4, c06	FMUL	a4, b2, t4	LDF	[BO + 1 * SIZE], b2	bg,pt	%icc, .LL126	LDF	[AO + 3 * SIZE], a4.LL129:#ifndef TRMMKERNEL	FADD1	c03, t1, c03	LDF	[C1 + 0 * SIZE], a1	FADD3	c07, t2, c07	LDF	[C1 + 1 * SIZE], a2	FADD2	c04, t3, c04	LDF	[C1 + 2 * SIZE], a3	FADD4	c08, t4, c08	LDF	[C1 + 3 * SIZE], a4	FADD	c01, c06, c01	FMUL	ALPHA_R, c01, t1	FADD	c02, c05, c02	FMUL	ALPHA_R, c02, t2	FADD	c03, c08, c03	FMUL	ALPHA_R, c03, t3	FADD	c04, c07, c04	FMUL	ALPHA_R, c04, t4	FADD	a1, t1, a1	FMUL	ALPHA_I, c02, t1	FADD	a2, t2, a2	FMUL	ALPHA_I, c01, t2	FADD	a3, t3, a3	FMUL	ALPHA_I, c04, t3	FADD	a4, t4, a4	FMUL	ALPHA_I, c03, t4	FSUB	a1, t1, a1	FADD	a2, t2, a2	FSUB	a3, t3, a3	FADD	a4, t4, a4	STF	a1, [C1 + 0 * SIZE]	FMOV	FZERO, t1	STF	a2, [C1 + 1 * SIZE]	FMOV	FZERO, t2	STF	a3, [C1 + 2 * SIZE]	FMOV	FZERO, t3	STF	a4, [C1 + 3 * SIZE]	FMOV	FZERO, t4#else	FADD1	c03, t1, c03	FADD3	c07, t2, c07	FADD2	c04, t3, c04	FADD4	c08, t4, c08	FADD	c01, c06, c01	FADD	c02, c05, c02	FADD	c03, c08, c03	FADD	c04, c07, c04	STF	c01, [C1 + 0 * SIZE]	FMOV	FZERO, t1	STF	c02, [C1 + 1 * SIZE]	FMOV	FZERO, t2	STF	c03, [C1 + 2 * SIZE]	FMOV	FZERO, t3	STF	c04, [C1 + 3 * SIZE]	FMOV	FZERO, t4#endif	add	C1, 4 * SIZE, C1#ifdef TRMMKERNEL#if ( defined(LEFT) &&  defined(TRANSA)) || \    (!defined(LEFT) && !defined(TRANSA))	sub	K, KK, TEMP1#ifdef LEFT	add	TEMP1, -2, TEMP1#else	add	TEMP1, -1, TEMP1#endif	sll	TEMP1, 1 + ZBASE_SHIFT, TEMP2	sll	TEMP1, 0 + ZBASE_SHIFT, TEMP1	add	AO, TEMP2, AO	add	BO, TEMP1, BO#endif#ifdef LEFT	add	KK, 2, KK#endif#endif	add	I, -1, I	cmp	I, 0	bg,pt	%icc, .LL121	FMOV	FZERO, c03.LL150:	and	M, 1, I	cmp	I, 0	ble,pn	%icc, .LL999	nop#if !defined(TRMMKERNEL)	LDF	[AO + 0 * SIZE], a1	sra	K, 2, L	FMOV	FZERO, c01	LDF	[B  + 0 * SIZE], b1	mov	B, BO	FMOV	FZERO, t1	LDF	[AO + 1 * SIZE], a2	cmp	L,  0	FMOV	FZERO, c02	LDF	[B  + 1 * SIZE], b2	FMOV	FZERO, t2	LDF	[AO + 2 * SIZE], a3	FMOV	FZERO, c03	LDF	[B  + 2 * SIZE], b3	FMOV	FZERO, t3	LDF	[AO + 3 * SIZE], a4	FMOV	FZERO, c04	LDF	[B  + 3 * SIZE], b4	FMOV	FZERO, t4#else#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))	mov	B, BO#else	sll	KK, 0 + ZBASE_SHIFT, TEMP1	sll	KK, 0 + ZBASE_SHIFT, TEMP2	add	AO, TEMP1, AO	add	B,  TEMP2, BO#endif#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))	sub	K, KK, L#elif defined(LEFT)	add	KK, 1, L#else	add	KK, 1, L#endif	sra	L, 2, L	cmp	L,  0	LDF	[AO + 0 * SIZE], a1	FMOV	FZERO, c01	LDF	[BO + 0 * SIZE], b1	FMOV	FZERO, t1	LDF	[AO + 1 * SIZE], a2	FMOV	FZERO, c02	LDF	[BO  + 1 * SIZE], b2	FMOV	FZERO, t2	LDF	[AO + 2 * SIZE], a3	FMOV	FZERO, c03	LDF	[BO  + 2 * SIZE], b3	FMOV	FZERO, t3	LDF	[AO + 3 * SIZE], a4	FMOV	FZERO, c04	LDF	[BO  + 3 * SIZE], b4	FMOV	FZERO, t4#endif	ble,pn	%icc, .LL155	nop.LL152:	FADD1	c01, t1, c01	add	L, -1, L	FMUL	a1, b1, t1	prefetch [AO + APREFETCHSIZE * SIZE], 0	FADD3	c02, t2, c02	add	BO,  8 * SIZE, BO	FMUL	a1, b2, t2	LDF	[AO + 4 * SIZE], a1	FADD2	c03, t3, c03	cmp	L, 0	FMUL	a2, b1, t3	LDF	[BO - 4 * SIZE], b1	FADD4	c04, t4, c04	nop	FMUL	a2, b2, t4	LDF	[AO + 5 * SIZE], a2	FADD1	c01, t1, c01	nop	FMUL	a3, b3, t1	LDF	[BO - 3 * SIZE], b2	FADD3	c02, t2, c02	nop	FMUL	a3, b4, t2	LDF	[AO + 6 * SIZE], a3	FADD2	c03, t3, c03	nop	FMUL	a4, b3, t3	LDF	[BO - 2 * SIZE], b3	FADD4	c04, t4, c04	nop	FMUL	a4, b4, t4	LDF	[AO + 7 * SIZE], a4	FADD1	c01, t1, c01	nop	FMUL	a1, b1, t1	LDF	[BO - 1 * SIZE], b4	FADD3	c02, t2, c02	FMUL	a1, b2, t2	LDF	[AO +  8 * SIZE], a1	FADD2	c03, t3, c03	FMUL	a2, b1, t3	LDF	[BO +  0 * SIZE], b1	FADD4	c04, t4, c04	FMUL	a2, b2, t4	LDF	[AO +  9 * SIZE], a2	FADD1	c01, t1, c01	FMUL	a3, b3, t1	LDF	[BO +  1 * SIZE], b2	FADD3	c02, t2, c02	FMUL	a3, b4, t2	LDF	[AO + 10 * SIZE], a3	FADD2	c03, t3, c03	FMUL	a4, b3, t3	LDF	[BO +  2 * SIZE], b3	FADD4	c04, t4, c04	FMUL	a4, b4, t4	LDF	[AO + 11 * SIZE], a4	add	AO,  8 * SIZE, AO	bg,pt	%icc, .LL152	LDF	[BO +  3 * SIZE], b4.LL155:#ifndef TRMMKERNEL	and	K, 3, L#else#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))	sub	K, KK, L#elif defined(LEFT)	add	KK, 1, L#else	add	KK, 1, L#endif	and	L, 3, L#endif	cmp	L,  0	ble,a,pn %icc, .LL159	nop.LL156:	FADD1	c01, t1, c01	add	AO, 2 * SIZE, AO	FMUL	a1, b1, t1	add	BO, 2 * SIZE, BO	FADD3	c02, t2, c02	add	L, -1, L	FMUL	a1, b2, t2	LDF	[AO + 0 * SIZE], a1	FADD2	c03, t3, c03	FMUL	a2, b1, t3	LDF	[BO + 0 * SIZE], b1	cmp	L, 0	FADD4	c04, t4, c04	FMUL	a2, b2, t4	LDF	[BO + 1 * SIZE], b2	bg,pt	%icc, .LL156	LDF	[AO + 1 * SIZE], a2.LL159:#ifndef TRMMKERNEL	FADD1	c01, t1, c01	FADD3	c02, t2, c02	FADD2	c03, t3, c03	FADD4	c04, t4, c04	LDF	[C1 + 0 * SIZE], a1	LDF	[C1 + 1 * SIZE], a2	FADD	c01, c04, c01	FADD	c02, c03, c02	FMUL	ALPHA_R, c01, t1	FMUL	ALPHA_R, c02, t2	FMUL	ALPHA_I, c02, t3	FMUL	ALPHA_I, c01, t4	FADD	a1, t1, a1	FADD	a2, t2, a2	FSUB	a1, t3, a1	FADD	a2, t4, a2	STF	a1, [C1 + 0 * SIZE]	STF	a2, [C1 + 1 * SIZE]#else	FADD1	c01, t1, c01	FADD3	c02, t2, c02	FADD2	c03, t3, c03	FADD4	c04, t4, c04	FADD	c01, c04, c01	FADD	c02, c03, c02	STF	c01, [C1 + 0 * SIZE]	STF	c02, [C1 + 1 * SIZE]#endif	add	C1, 2 * SIZE, C1#ifndef TRMMKERNEL#if ( defined(LEFT) &&  defined(TRANSA)) || \    (!defined(LEFT) && !defined(TRANSA))	sub	K, KK, TEMP1#ifdef LEFT	add	TEMP1, -1, TEMP1#else	add	TEMP1, -1, TEMP1#endif	sll	TEMP1, 0 + ZBASE_SHIFT, TEMP2	sll	TEMP1, 0 + ZBASE_SHIFT, TEMP1	add	AO, TEMP2, AO	add	BO, TEMP1, BO#endif#ifdef LEFT	add	KK, 1, KK#endif#endif.LL999:	return	%i7 + 8	clr	%o0	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -