⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gemm_kernel_hummer.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 5 页
字号:
	LFPDUX	A5,  BO,  INC4	fxcpmadd	f1,  A4, A6, f1	LFPDUX	A6,  BO2, INC4	fxcsmadd	f2,  A4, A7, f2	LFPDUX	A7,  BO,  INC4	fxcsmadd	f3,  A4, A8, f3	LFPDUX	A8,  BO2, INC4	LFPDUX	A4, AO2,  INC4	bdnz+	.L42	.align 4.L43:	fxcpmadd	f0,  A1, B1, f0	LFPDUX	B1,  BO,  INC4	fxcpmadd	f1,  A1, B2, f1	LFPDUX	B2,  BO2, INC4	fxcsmadd	f2,  A1, B3, f2	LFPDUX	B3,  BO,  INC4	fxcsmadd	f3,  A1, B4, f3	LFPDUX	B4,  BO2, INC4	fxcpmadd	f0,  A2, A5, f0	LFPDUX	A5,  BO,  INC4	fxcpmadd	f1,  A2, A6, f1	LFPDUX	A6,  BO2, INC4	fxcsmadd	f2,  A2, A7, f2	LFPDUX	A7,  BO,  INC4	fxcsmadd	f3,  A2, A8, f3	LFPDUX	A8,  BO2, INC4	fxcpmadd	f0,  A3, B1, f0	fxcpmadd	f1,  A3, B2, f1	fxcsmadd	f2,  A3, B3, f2	fxcsmadd	f3,  A3, B4, f3	fxcpmadd	f0,  A4, A5, f0	fxcpmadd	f1,  A4, A6, f1	fxcsmadd	f2,  A4, A7, f2	fxcsmadd	f3,  A4, A8, f3	.align 4.L44:	lfd	AP,  ALPHA(SP)#ifdef TRMMKERNEL       fsmfp	AP, AP#endif#if defined(TRMMKERNEL)#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))	sub	TEMP, K, KK#elif defined(LEFT)	addi	TEMP, KK, 1#else	addi	TEMP, KK, 4#endif	andi.	TEMP,  TEMP,  7	mtspr	CTR, TEMP#else	andi.	r0,  K,  7	mtspr	CTR, r0#endif	ble+	.L48	LFDX	A1,  AO,  INC4	LFPDUX	B1,  BO,  INC4	LFPDUX	B2,  BO2, INC4	add	AO, AO, INC	bdz-	.L47	.align 4.L46:	fxcpmadd	f0,  A1, B1, f0	LFPDUX	B1,  BO,  INC4	fxcpmadd	f1,  A1, B2, f1	LFDX	A1,  AO,  INC4	LFPDUX	B2,  BO2, INC4	add	AO, AO, INC	bdnz+	.L46	.align 4.L47:	fxcpmadd	f0,  A1, B1, f0	fxcpmadd	f1,  A1, B2, f1	.align 4.L48:#ifndef TRMMKERNEL	LFDX	A1, CO1, INC2	LFDX	A2, CO2, INC2	LFDX	A3, CO3, INC2	LFDX	A4, CO4, INC2	fpadd	f0, f0, f2	fpadd	f1, f1, f3	fsmfp	A1, A2	fsmfp	A3, A4		fxcpmadd	f0,  AP, f0,  A1	fxcpmadd	f1,  AP, f1,  A3#else	fpadd	f0, f0, f2	fpadd	f1, f1, f3	fpmul	f0,  AP, f0	fpmul	f1,  AP, f1#endif	STFDX	f0,  CO1, INC2	STFSDX	f0,  CO2, INC2	STFDX	f1,  CO3, INC2	STFSDX	f1,  CO4, INC2#ifdef TRMMKERNEL#if ( defined(LEFT) &&  defined(TRANSA)) || \    (!defined(LEFT) && !defined(TRANSA))	sub	TEMP, K, KK#ifdef LEFT	addi	TEMP, TEMP, -1#else	addi	TEMP, TEMP, -4#endif	slwi	r0,   TEMP, 0 + BASE_SHIFT	slwi	TEMP, TEMP, 2 + BASE_SHIFT	add	AO, AO, r0	add	BO, BO, TEMP#endif#ifdef LEFT	addi	KK, KK, 1#endif#endif	.align 4.L49:#if defined(TRMMKERNEL) && !defined(LEFT)	addi	KK, KK, 4#endif	addi	B,  BO, 4 * SIZE	addic.	J, J, -1	bgt+	.L10	.align 4.L50:	andi.	J, N,  2	beq	.L90	mr	CO1, C	add	CO2, C,   LDC	add	C,   CO2, LDC#if defined(TRMMKERNEL) &&  defined(LEFT)	mr	KK, OFFSET#endif	addi	AO, A, -2 * SIZE		li	r0, FZERO	lfpsx	f0, SP, r0	srawi.	I, M,  3	ble	.L60	.align 4.L51:#if defined(TRMMKERNEL)#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))	fpmr	f4,  f0	addi	BO,  B,  - 2 * SIZE 	fpmr	f1,  f0	fpmr	f5,  f0	fpmr	f2,  f0	fpmr	f6,  f0#else	slwi	TEMP, KK, 3 + BASE_SHIFT	slwi	r0,   KK, 1 + BASE_SHIFT	add	AO, AO, TEMP	add	BO, B,  r0	fpmr	f4,  f0	addi	BO,  BO,  - 2 * SIZE 	fpmr	f1,  f0	fpmr	f5,  f0	fpmr	f2,  f0	fpmr	f6,  f0#endif#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))	sub	TEMP, K, KK#elif defined(LEFT)	addi	TEMP, KK, 8#else	addi	TEMP, KK, 2#endif	srawi.	r0,  TEMP,  2	fpmr	f3,  f0	mtspr	CTR, r0	fpmr	f7,  f0	ble	.L54#else	fpmr	f4,  f0	addi	BO,  B,  - 2 * SIZE 	fpmr	f1,  f0	fpmr	f5,  f0	fpmr	f2,  f0	fpmr	f6,  f0	srawi.	r0,  K,  2	fpmr	f3,  f0	mtspr	CTR, r0	fpmr	f7,  f0	ble	.L54#endif	LFPDUX	B1,  BO,  INC2	LFPDUX	A1,  AO,  INC2	LFPDUX	A2,  AO,  INC2	LFPDUX	B2,  BO,  INC2	LFPDUX	A3,  AO,  INC2	LFPDUX	A4,  AO,  INC2	LFPDUX	B3,  BO,  INC2	LFPDUX	A5,  AO,  INC2	LFPDUX	A6,  AO,  INC2	LFPDUX	A7,  AO,  INC2	LFPDUX	A8,  AO,  INC2	bdz-	.L53	.align 4.L52:	fxcpmadd	f0,  B1, A1, f0	LFPDUX	B4,  BO,  INC2	fxcsmadd	f4,  B1, A1, f4	LFPDUX	A1,  AO,  INC2	fxcpmadd	f1,  B1, A2, f1	nop	fxcsmadd	f5,  B1, A2, f5	LFPDUX	A2,  AO,  INC2	fxcpmadd	f2,  B1, A3, f2	nop	fxcsmadd	f6,  B1, A3, f6	LFPDUX	A3,  AO,  INC2	fxcpmadd	f3,  B1, A4, f3	nop	fxcsmadd	f7,  B1, A4, f7	LFPDUX	A4,  AO,  INC2	fxcpmadd	f0,  B2, A5, f0	LFPDUX	B1,  BO,  INC2	fxcsmadd	f4,  B2, A5, f4	LFPDUX	A5,  AO,  INC2	fxcpmadd	f1,  B2, A6, f1	nop	fxcsmadd	f5,  B2, A6, f5	LFPDUX	A6,  AO,  INC2	fxcpmadd	f2,  B2, A7, f2	nop	fxcsmadd	f6,  B2, A7, f6	LFPDUX	A7,  AO,  INC2	fxcpmadd	f3,  B2, A8, f3	nop	fxcsmadd	f7,  B2, A8, f7	LFPDUX	A8,  AO,  INC2	fxcpmadd	f0,  B3, A1, f0	LFPDUX	B2,  BO,  INC2	fxcsmadd	f4,  B3, A1, f4	LFPDUX	A1,  AO,  INC2	fxcpmadd	f1,  B3, A2, f1	nop	fxcsmadd	f5,  B3, A2, f5	LFPDUX	A2,  AO,  INC2	fxcpmadd	f2,  B3, A3, f2	nop	fxcsmadd	f6,  B3, A3, f6	LFPDUX	A3,  AO,  INC2	fxcpmadd	f3,  B3, A4, f3	nop	fxcsmadd	f7,  B3, A4, f7	LFPDUX	A4,  AO,  INC2	fxcpmadd	f0,  B4, A5, f0	LFPDUX	B3,  BO,  INC2	fxcsmadd	f4,  B4, A5, f4	LFPDUX	A5,  AO,  INC2	fxcpmadd	f1,  B4, A6, f1	nop	fxcsmadd	f5,  B4, A6, f5	LFPDUX	A6,  AO,  INC2	fxcpmadd	f2,  B4, A7, f2	nop	fxcsmadd	f6,  B4, A7, f6	LFPDUX	A7,  AO,  INC2	fxcpmadd	f3,  B4, A8, f3	nop	fxcsmadd	f7,  B4, A8, f7	LFPDUX	A8,  AO,  INC2	bdnz+	.L52	.align 4.L53:	fxcpmadd	f0,  B1, A1, f0	LFPDUX	B4,  BO,  INC2	fxcsmadd	f4,  B1, A1, f4	LFPDUX	A1,  AO,  INC2	fxcpmadd	f1,  B1, A2, f1	nop	fxcsmadd	f5,  B1, A2, f5	LFPDUX	A2,  AO,  INC2	fxcpmadd	f2,  B1, A3, f2	nop	fxcsmadd	f6,  B1, A3, f6	LFPDUX	A3,  AO,  INC2	fxcpmadd	f3,  B1, A4, f3	nop	fxcsmadd	f7,  B1, A4, f7	LFPDUX	A4,  AO,  INC2	fxcpmadd	f0,  B2, A5, f0	nop	fxcsmadd	f4,  B2, A5, f4	LFPDUX	A5,  AO,  INC2	fxcpmadd	f1,  B2, A6, f1	nop	fxcsmadd	f5,  B2, A6, f5	LFPDUX	A6,  AO,  INC2	fxcpmadd	f2,  B2, A7, f2	nop	fxcsmadd	f6,  B2, A7, f6	LFPDUX	A7,  AO,  INC2	fxcpmadd	f3,  B2, A8, f3	nop	fxcsmadd	f7,  B2, A8, f7	LFPDUX	A8,  AO,  INC2	fxcpmadd	f0,  B3, A1, f0	fxcsmadd	f4,  B3, A1, f4	fxcpmadd	f1,  B3, A2, f1	fxcsmadd	f5,  B3, A2, f5	fxcpmadd	f2,  B3, A3, f2	fxcsmadd	f6,  B3, A3, f6	fxcpmadd	f3,  B3, A4, f3	fxcsmadd	f7,  B3, A4, f7	fxcpmadd	f0,  B4, A5, f0	fxcsmadd	f4,  B4, A5, f4	fxcpmadd	f1,  B4, A6, f1	fxcsmadd	f5,  B4, A6, f5	fxcpmadd	f2,  B4, A7, f2	fxcsmadd	f6,  B4, A7, f6	fxcpmadd	f3,  B4, A8, f3	fxcsmadd	f7,  B4, A8, f7	.align 4.L54:	lfd	AP,  ALPHA(SP)#ifdef TRMMKERNEL       fsmfp	AP, AP#endif#if defined(TRMMKERNEL)#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))	sub	TEMP, K, KK#elif defined(LEFT)	addi	TEMP, KK, 8#else	addi	TEMP, KK, 2#endif	andi.	TEMP,  TEMP,  3	mtspr	CTR, TEMP#else	andi.	r0,  K,  3	mtspr	CTR, r0#endif	ble+	.L58	LFPDUX	A1,  AO,  INC2	LFPDUX	B1,  BO,  INC2	LFPDUX	A2,  AO,  INC2	LFPDUX	A3,  AO,  INC2	LFPDUX	A4,  AO,  INC2	bdz-	.L57	.align 4.L56:	fxcpmadd	f0,  B1, A1, f0	fxcsmadd	f4,  B1, A1, f4	LFPDUX	A1,  AO,  INC2	fxcpmadd	f1,  B1, A2, f1	fxcsmadd	f5,  B1, A2, f5	LFPDUX	A2,  AO,  INC2	fxcpmadd	f2,  B1, A3, f2	fxcsmadd	f6,  B1, A3, f6	LFPDUX	A3,  AO,  INC2	fxcpmadd	f3,  B1, A4, f3	fxcsmadd	f7,  B1, A4, f7	LFPDUX	A4,  AO,  INC2	LFPDUX	B1,  BO,  INC2	bdnz+	.L56	.align 4.L57:	fxcpmadd	f0,  B1, A1, f0	fxcsmadd	f4,  B1, A1, f4	fxcpmadd	f1,  B1, A2, f1	fxcsmadd	f5,  B1, A2, f5	fxcpmadd	f2,  B1, A3, f2	fxcsmadd	f6,  B1, A3, f6	fxcpmadd	f3,  B1, A4, f3	fxcsmadd	f7,  B1, A4, f7	.align 4.L58:#ifndef TRMMKERNEL	LFPDUX	A1, CO1, INC2	LFPDUX	B1, CO1, INC2	LFPDUX	A3, CO1, INC2   	LFPDUX	A5, CO1, INC2 	LFPDUX	B3, CO2, INC2	LFPDUX	A6, CO2, INC2	LFPDUX	A7, CO2, INC2	LFPDUX	B2, CO2, INC2	fxcpmadd	f0,  AP, f0,  A1	fxcpmadd	f1,  AP, f1,  B1	fxcpmadd	f2,  AP, f2,  A3	fxcpmadd	f3,  AP, f3,  A5	fxcpmadd	f4,  AP, f4,  B3	fxcpmadd	f5,  AP, f5,  A6	STFPDUX	f0,  CO1, INCM7	fxcpmadd	f6,  AP, f6,  A7	STFPDUX	f1,  CO1, INC2	fxcpmadd	f7,  AP, f7,  B2	STFPDUX	f2,  CO1, INC2	STFPDUX	f3,  CO1, INC2	STFPDUX	f4,  CO2, INCM7	STFPDUX	f5,  CO2, INC2	STFPDUX	f6,  CO2, INC2	STFPDUX	f7,  CO2, INC2#else	fpmul	f0,  AP, f0	fpmul	f1,  AP, f1	fpmul	f2,  AP, f2	fpmul	f3,  AP, f3	fpmul	f4,  AP, f4	fpmul	f5,  AP, f5	STFPDUX	f0,  CO1, INC2	fpmul	f6,  AP, f6	STFPDUX	f1,  CO1, INC2	fpmul	f7,  AP, f7	STFPDUX	f2,  CO1, INC2	STFPDUX	f3,  CO1, INC2	STFPDUX	f4,  CO2, INC2	STFPDUX	f5,  CO2, INC2	STFPDUX	f6,  CO2, INC2	STFPDUX	f7,  CO2, INC2#endif#ifdef TRMMKERNEL#if ( defined(LEFT) &&  defined(TRANSA)) || \    (!defined(LEFT) && !defined(TRANSA))	sub	TEMP, K, KK#ifdef LEFT	addi	TEMP, TEMP, -8#else	addi	TEMP, TEMP, -2#endif	slwi	r0,   TEMP, 3 + BASE_SHIFT	slwi	TEMP, TEMP, 1 + BASE_SHIFT	add	AO, AO, r0	add	BO, BO, TEMP#endif#ifdef LEFT	addi	KK, KK, 8#endif#endif	addic.	I, I, -1	li	r0, FZERO	lfpsx	f0, SP, r0	bgt+	.L51	.align 4.L60:	andi.	I, M,  4	beq	.L70#if defined(TRMMKERNEL)#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))	addi	BO,  B,  - 2 * SIZE 	fpmr	f1,  f0#else	slwi	TEMP, KK, 2 + BASE_SHIFT	slwi	r0,   KK, 1 + BASE_SHIFT	add	AO, AO, TEMP	add	BO, B,  r0	addi	BO,  BO,  - 2 * SIZE 	fpmr	f1,  f0#endif#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))	sub	TEMP, K, KK#elif defined(LEFT)	addi	TEMP, KK, 4#else	addi	TEMP, KK, 2#endif	fpmr	f2,  f0	srawi.	r0,  TEMP,  2	mtspr	CTR, r0	fpmr	f3,  f0	ble	.L64#else	srawi.	r0,  K,  2 	fpmr	f1,  f0	addi	BO,  B,  - 2 * SIZE	fpmr	f2,  f0	mtspr	CTR, r0	fpmr	f3,  f0	ble	.L64#endif	LFPDUX	B1,  BO, INC2	LFPDUX	A1,  AO, INC2	LFPDUX	A2,  AO, INC2	LFPDUX	B2,  BO, INC2	LFPDUX	A3,  AO, INC2	LFPDUX	A4,  AO, INC2	LFPDUX	B3,  BO, INC2	LFPDUX	A5,  AO, INC2	LFPDUX	A6,  AO, INC2	LFPDUX	B4,  BO, INC2	LFPDUX	A7,  AO, INC2	LFPDUX	A8,  AO, INC2	bdz-	.L63	.align 4.L62:	fxcpmadd	f0,  B1, A1, f0	fxcsmadd	f2,  B1, A1, f2	LFPDUX	A1,  AO, INC2	fxcpmadd	f1,  B1, A2, f1	fxcsmadd	f3,  B1, A2, f3	LFPDUX	A2,  AO, INC2	LFPDUX	B1,  BO, INC2	fxcpmadd	f0,  B2, A3, f0	fxcsmadd	f2,  B2, A3, f2	LFPDUX	A3,  AO, INC2	fxcpmadd	f1,  B2, A4, f1	fxcsmadd	f3,  B2, A4, f3	LFPDUX	A4,  AO, INC2	LFPDUX	B2,  BO, INC2	fxcpmadd	f0,  B3, A5, f0	fxcsmadd	f2,  B3, A5, f2	LFPDUX	A5,  AO, INC2	fxcpmadd	f1,  B3, A6, f1	fxcsmadd	f3,  B3, A6, f3	LFPDUX	A6,  AO, INC2	LFPDUX	B3,  BO, INC2	fxcpmadd	f0,  B4, A7, f0	fxcsmadd	f2,  B4, A7, f2	LFPDUX	A7,  AO, INC2	fxcpmadd	f1,  B4, A8, f1	fxcsmadd	f3,  B4, A8, f3	LFPDUX	A8,  AO, INC2	LFPDUX	B4,  BO, INC2	bdnz+	.L62	.align 4.L63:	fxcpmadd	f0,  B1, A1, f0	fxcsmadd	f2,  B1, A1, f2	fxcpmadd	f1,  B1, A2, f1	fxcsmadd	f3,  B1, A2, f3	fxcpmadd	f0,  B2, A3, f0	fxcsmadd	f2,  B2, A3, f2	fxcpmadd	f1,  B2, A4, f1	fxcsmadd	f3,  B2, A4, f3	fxcpmadd	f0,  B3, A5, f0	fxcsmadd	f2,  B3, A5, f2	fxcpmadd	f1,  B3, A6, f1	fxcsmadd	f3,  B3, A6, f3	fxcpmadd	f0,  B4, A7, f0	fxcsmadd	f2,  B4, A7, f2	fxcpmadd	f1,  B4, A8, f1	fxcsmadd	f3,  B4, A8, f3	.align 4.L64:	lfd	AP,  ALPHA(SP)#ifdef TRMMKERNEL       fsmfp	AP, AP#endif#if defined(TRMMKERNEL)#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))	sub	TEMP, K, KK#elif defined(LEFT)	addi	TEMP, KK, 4#else	addi	TEMP, KK, 2#endif	andi.	TEMP,  TEMP,  3	mtspr	CTR, TEMP#else	andi.	r0,  K,  3	mtspr	CTR, r0#endif	ble+	.L68	LFPDUX	A1,  AO,  INC2	LFPDUX	B1,  BO,  INC2	LFPDUX	A2,  AO,  INC2	bdz-	.L67	.align 4.L66:	fxcpmadd	f0,  B1, A1, f0	fxcsmadd	f2,  B1, A1, f2	LFPDUX	A1,  AO,  INC2	fxcpmadd	f1,  B1, A2, f1	fxcsmadd	f3,  B1, A2, f3	LFPDUX	B1,  BO,  INC2	LFPDUX	A2,  AO,  INC2	bdnz+	.L66	.align 4.L67:	fxcpmadd	f0,  B1, A1, f0	fxcsmadd	f2,  B1, A1, f2	fxcpmadd	f1,  B1, A2, f1	fxcsmadd	f3,  B1, A2, f3	.align 4.L68:#ifndef TRMMKERNEL	LFPDUX	A1, CO1, INC2	LFPDUX	A2, CO1, INC2	LFPDUX	A3, CO2, INC2	LFPDUX	A4, CO2, INC2	fxcpmadd	f0,  AP, f0,  A1	fxcpmadd	f1,  AP, f1,  A2	fxcpmadd	f2,  AP, f2,  A3 	fxcpmadd	f3,  AP, f3,  A4	STFPDUX	f0,  CO1, INCM3	STFPDUX	f1,  CO1, INC2	STFPDUX	f2,  CO2, INCM3	STFPDUX	f3,  CO2, INC2#else	fpmul	f0,  AP, f0	fpmul	f1,  AP, f1	fpmul	f2,  AP, f2 	fpmul	f3,  AP, f3	STFPDUX	f0,  CO1, INC2	STFPDUX	f1,  CO1, INC2	STFPDUX	f2,  CO2, INC2	STFPDUX	f3,  CO2, INC2#endif#ifdef TRMMKERNEL#if ( defined(LEFT) &&  defined(TRANSA)) || \    (!defined(LEFT) && !defined(TRANSA))	sub	TEMP, K, KK#ifdef LEFT	addi	TEMP, TEMP, -4#else	addi	TEMP, TEMP, -2#endif	slwi	r0,   TEMP, 2 + BASE_SHIFT	slwi	TEMP, TEMP, 1 + BASE_SHIFT	add	AO, AO, r0	add	BO, BO, TEMP#endif#ifdef LEFT	addi	KK, KK, 4#endif#endif	li	r0, FZERO	lfpsx	f0, SP, r0	.align 4.L70:	andi.	I, M,  2	beq	.L80#if defined(TRMMKERNEL)#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))	addi	BO,  B,  - 2 * SIZE	fpmr	f1,  f0#else	slwi	TEMP, KK, 1 + BASE_SHIFT	slwi	r0,   KK, 1 + BASE_SHIFT	add	AO, AO, TEMP	add	BO, B,  r0	addi	BO,  BO,  - 2 * SIZE	fpmr	f1,  f0#endif#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))	sub	TEMP, K, KK#elif defined(LEFT)	addi	TEMP, KK, 2#else	addi	TEMP, KK, 2#endif	srawi.	r0,  TEMP,  3	fpmr	f2,  f0	mtspr	CTR, r0	fpmr	f3, f0	ble	.L74#else	addi	BO,  B,  - 2 * SIZE	fpmr	f1,  f0	srawi.	r0,  K,  3	fpmr	f2,  f0	mtspr	CTR, r0	fpmr	f3, f0	ble	.L74#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -