⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trsm_kernel_hummer_ln.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 5 页
字号:
	LFPDUX	B5,  BO, INC2	LFPDUX	A6,  AO, INC2	LFPDUX	B6,  BO, INC2	LFPDUX	A7,  AO, INC2	LFPDUX	A9,  BO, INC2	LFPDUX	A8,  AO, INC2	LFPDUX	A10, BO, INC2	bdz-	.L73	.align 4.L72:	fxcpmadd	f0,  B1, A1, f0	fxcsmadd	f1,  B1, A1, f1	LFPDUX	A1,  AO, INC2	LFPDUX	B1,  BO, INC2	fxcpmadd	f2,  B2, A2, f2	fxcsmadd	f3,  B2, A2, f3	LFPDUX	A2,  AO, INC2	LFPDUX	B2,  BO, INC2	fxcpmadd	f0,  B3, A3, f0	fxcsmadd	f1,  B3, A3, f1	LFPDUX	A3,  AO, INC2	LFPDUX	B3,  BO, INC2	fxcpmadd	f2,  B4, A4, f2	fxcsmadd	f3,  B4, A4, f3	LFPDUX	A4,  AO, INC2	LFPDUX	B4,  BO, INC2	fxcpmadd	f0,  B5, A5, f0	fxcsmadd	f1,  B5, A5, f1	LFPDUX	A5,  AO, INC2	LFPDUX	B5,  BO, INC2	fxcpmadd	f2,  B6, A6, f2	fxcsmadd	f3,  B6, A6, f3	LFPDUX	A6,  AO, INC2	LFPDUX	B6,  BO, INC2	fxcpmadd	f0,  A9,  A7, f0	fxcsmadd	f1,  A9,  A7, f1	LFPDUX	A7,  AO, INC2	LFPDUX	A9,  BO, INC2	fxcpmadd	f2,  A10, A8, f2	fxcsmadd	f3,  A10, A8, f3	LFPDUX	A8,  AO, INC2	LFPDUX	A10, BO, INC2	bdnz+	.L72	.align 4.L73:	fxcpmadd	f0,  B1, A1, f0	fxcsmadd	f1,  B1, A1, f1	fxcpmadd	f2,  B2, A2, f2	fxcsmadd	f3,  B2, A2, f3	fxcpmadd	f0,  B3, A3, f0	fxcsmadd	f1,  B3, A3, f1	fxcpmadd	f2,  B4, A4, f2	fxcsmadd	f3,  B4, A4, f3	fxcpmadd	f0,  B5, A5, f0	fxcsmadd	f1,  B5, A5, f1	fxcpmadd	f2,  B6, A6, f2	fxcsmadd	f3,  B6, A6, f3	fxcpmadd	f0,  A9,  A7, f0	fxcsmadd	f1,  A9,  A7, f1	fxcpmadd	f2,  A10, A8, f2	fxcsmadd	f3,  A10, A8, f3	.align 4.L74:#if defined(LT) || defined(RN)	andi.	r0,  KK,  7	mtspr	CTR, r0	ble+	.L78#else	andi.	r0, TEMP, 7	mtspr	CTR, r0	ble+	.L78#endif	LFPDUX	A1,  AO,  INC2	LFPDUX	B1,  BO,  INC2	bdz-	.L77	.align 4.L76:	fxcpmadd	f0,  B1, A1, f0	fxcsmadd	f1,  B1, A1, f1	LFPDUX	A1,  AO,  INC2	LFPDUX	B1,  BO,  INC2	bdnz+	.L76	.align 4.L77:	fxcpmadd	f0,  B1, A1, f0	fxcsmadd	f1,  B1, A1, f1	.align 4.L78:	fpadd	f0, f0, f2	fpadd	f1, f1, f3#if defined(LN) || defined(RT)#ifdef LN	subi	r0, KK, 2#else	subi	r0, KK, 2#endif	slwi	TEMP, r0, 1 + BASE_SHIFT	slwi	r0,   r0, 1 + BASE_SHIFT	add	AO, AORIG, TEMP	add	BO,  B,     r0	addi	BO,  BO, - 2 * SIZE#endif#if defined(LN) || defined(LT)	fpmr	f24, f0	fsmfp	f0,  f1	fsmtp	f1,  f24	LFPDUX	f16, BO,  INC2	LFPDUX	f17, BO,  INC2	subi	BO,  BO,   4 * SIZE	fpsub	f0,  f16,  f0	fpsub	f1,  f17,  f1#else	LFPDUX	f16, AO,  INC2	LFPDUX	f17, AO,  INC2	subi	AO,  AO,   4 * SIZE	fpsub	f0,  f16,  f0	fpsub	f1,  f17,  f1#endif#ifdef LN	LFPDUX	A1,  AO,  INC2	LFPDUX	A2,  AO,  INC2	addi	AO,  AO,  -4 * SIZE	fxsmul	   f1,  A2, f1	fxcpnmsub  f0,  A2, f1,  f0	fxpmul	   f0,  A1, f0#endif#ifdef LT	LFPDUX	A1,  AO,  INC2	LFPDUX	A2,  AO,  INC2	addi	AO,  AO,  -4 * SIZE	fxpmul	   f0,  A1, f0	fxcsnmsub  f1,  A1, f0, f1	fxsmul	   f1,  A2,  f1#endif#ifdef RN	LFPDUX	A1,  BO,  INC2	LFPDUX	A2,  BO,  INC2	subi	BO, BO, 4 * SIZE	fxpmul	     f0,  A1, f0	fxcsnmsub    f1,  A1, f0, f1	fxsmul	     f1,  A2,  f1#endif#ifdef RT	LFPDUX	A2,  BO,  INC2	LFPDUX	A1,  BO,  INC2	subi	BO, BO, 4 * SIZE	fxsmul	     f1,  A1, f1	fxcpnmsub    f0,  A1, f1,  f0	fxpmul	     f0,  A2,  f0#endif#ifdef LN	subi	CO1, CO1, 2 * SIZE	subi	CO2, CO2, 2 * SIZE#endif#if defined(LN) || defined(LT)	STFPDUX	f0,  BO,  INC2	STFPDUX	f1,  BO,  INC2	subi	BO,  BO,   4 * SIZE	STFDUX	f0,  CO1, INC	STFDUX	f1,  CO1, INC	STFSDUX	f0,  CO2, INC	STFSDUX	f1,  CO2, INC#else	STFPDUX	f0,  AO,  INC2	STFPDUX	f1,  AO,  INC2	subi	AO,  AO,   4 * SIZE	STFDUX	f0,  CO1, INC	STFSDUX	f0,  CO1, INC	STFDUX	f1,  CO2, INC	STFSDUX	f1,  CO2, INC#endif#ifdef LN	subi	CO1, CO1, 2 * SIZE	subi	CO2, CO2, 2 * SIZE#endif#ifdef RT	slwi	r0, K, 1 + BASE_SHIFT	add	AORIG, AORIG, r0#endif#if defined(LT) || defined(RN)	sub	TEMP, K, KK	slwi	r0,   TEMP, 1 + BASE_SHIFT	slwi	TEMP, TEMP, 1 + BASE_SHIFT	add	AO, AO, r0	add	BO, BO, TEMP#endif#ifdef LT	addi	KK, KK, 2#endif#ifdef LN	subi	KK, KK, 2#endif	li	r0, FZERO	lfpsx	f0, SP, r0	.align 4.L70:	andi.	I, M,  4	beq	.L80#if defined(LT) || defined(RN)	addi	BO,  B,  - 2 * SIZE 	fpmr	f1,  f0	fpmr	f2,  f0	fpmr	f3,  f0	srawi.	r0,  KK,  2	mtspr	CTR, r0	ble	.L64#else#ifdef LN	slwi	r0,   K,  2 + BASE_SHIFT	sub	AORIG, AORIG, r0#endif	slwi	r0  , KK, 2 + BASE_SHIFT	slwi	TEMP, KK, 1 + BASE_SHIFT	add	AO, AORIG, r0	add	BO, B,     TEMP	sub	TEMP, K, KK 	fpmr	f1,  f0	addi	BO,  BO,  - 2 * SIZE	fpmr	f2,  f0	fpmr	f3,  f0	srawi.	r0,  TEMP,  2	mtspr	CTR, r0	ble	.L64#endif	LFPDUX	B1,  BO, INC2	LFPDUX	A1,  AO, INC2	LFPDUX	A2,  AO, INC2	LFPDUX	B2,  BO, INC2	LFPDUX	A3,  AO, INC2	LFPDUX	A4,  AO, INC2	LFPDUX	B3,  BO, INC2	LFPDUX	A5,  AO, INC2	LFPDUX	A6,  AO, INC2	LFPDUX	B4,  BO, INC2	LFPDUX	A7,  AO, INC2	LFPDUX	A8,  AO, INC2	bdz-	.L63	.align 4.L62:	fxcpmadd	f0,  B1, A1, f0	fxcsmadd	f2,  B1, A1, f2	LFPDUX	A1,  AO, INC2	fxcpmadd	f1,  B1, A2, f1	fxcsmadd	f3,  B1, A2, f3	LFPDUX	A2,  AO, INC2	LFPDUX	B1,  BO, INC2	fxcpmadd	f0,  B2, A3, f0	fxcsmadd	f2,  B2, A3, f2	LFPDUX	A3,  AO, INC2	fxcpmadd	f1,  B2, A4, f1	fxcsmadd	f3,  B2, A4, f3	LFPDUX	A4,  AO, INC2	LFPDUX	B2,  BO, INC2	fxcpmadd	f0,  B3, A5, f0	fxcsmadd	f2,  B3, A5, f2	LFPDUX	A5,  AO, INC2	fxcpmadd	f1,  B3, A6, f1	fxcsmadd	f3,  B3, A6, f3	LFPDUX	A6,  AO, INC2	LFPDUX	B3,  BO, INC2	fxcpmadd	f0,  B4, A7, f0	fxcsmadd	f2,  B4, A7, f2	LFPDUX	A7,  AO, INC2	fxcpmadd	f1,  B4, A8, f1	fxcsmadd	f3,  B4, A8, f3	LFPDUX	A8,  AO, INC2	LFPDUX	B4,  BO, INC2	bdnz+	.L62	.align 4.L63:	fxcpmadd	f0,  B1, A1, f0	fxcsmadd	f2,  B1, A1, f2	fxcpmadd	f1,  B1, A2, f1	fxcsmadd	f3,  B1, A2, f3	fxcpmadd	f0,  B2, A3, f0	fxcsmadd	f2,  B2, A3, f2	fxcpmadd	f1,  B2, A4, f1	fxcsmadd	f3,  B2, A4, f3	fxcpmadd	f0,  B3, A5, f0	fxcsmadd	f2,  B3, A5, f2	fxcpmadd	f1,  B3, A6, f1	fxcsmadd	f3,  B3, A6, f3	fxcpmadd	f0,  B4, A7, f0	fxcsmadd	f2,  B4, A7, f2	fxcpmadd	f1,  B4, A8, f1	fxcsmadd	f3,  B4, A8, f3	.align 4.L64:#if defined(LT) || defined(RN)	andi.	r0,  KK,  3	mtspr	CTR, r0	ble+	.L68#else	andi.	r0, TEMP, 3	mtspr	CTR, r0	ble+	.L68#endif	LFPDUX	A1,  AO,  INC2	LFPDUX	B1,  BO,  INC2	LFPDUX	A2,  AO,  INC2	bdz-	.L67	.align 4.L66:	fxcpmadd	f0,  B1, A1, f0	fxcsmadd	f2,  B1, A1, f2	LFPDUX	A1,  AO,  INC2	fxcpmadd	f1,  B1, A2, f1	fxcsmadd	f3,  B1, A2, f3	LFPDUX	B1,  BO,  INC2	LFPDUX	A2,  AO,  INC2	bdnz+	.L66	.align 4.L67:	fxcpmadd	f0,  B1, A1, f0	fxcsmadd	f2,  B1, A1, f2	fxcpmadd	f1,  B1, A2, f1	fxcsmadd	f3,  B1, A2, f3	.align 4.L68:#if defined(LN) || defined(RT)#ifdef LN	subi	r0, KK, 4#else	subi	r0, KK, 2#endif	slwi	TEMP, r0, 2 + BASE_SHIFT	slwi	r0,   r0, 1 + BASE_SHIFT	add	AO, AORIG, TEMP	add	BO,  B,     r0	addi	BO,  BO, - 2 * SIZE#endif#if defined(LN) || defined(LT)	fpmr	f24, f0	fpmr	f25, f1	fsmfp	f0,  f2	fsmfp	f1,  f3	fsmtp	f2,  f24	fsmtp	f3,  f25	LFPDUX	f16, BO,  INC2	LFPDUX	f17, BO,  INC2	LFPDUX	f18, BO,  INC2	LFPDUX	f19, BO,  INC2	subi	BO,  BO,   8 * SIZE	fpsub	f0,  f16,  f0	fpsub	f2,  f17,  f2	fpsub	f1,  f18,  f1	fpsub	f3,  f19,  f3#else	LFPDUX	f16, AO,  INC2	LFPDUX	f17, AO,  INC2	LFPDUX	f18, AO,  INC2	LFPDUX	f19, AO,  INC2	subi	AO,  AO,   8 * SIZE	fpsub	f0,  f16,  f0	fpsub	f1,  f17,  f1	fpsub	f2,  f18,  f2	fpsub	f3,  f19,  f3#endif#ifdef LN	addi	AO,  AO,  18 * SIZE	LFPDUX	A1,  AO,  INCM2	LFPDUX	A2,  AO,  INCM2	LFPDUX	A3,  AO,  INCM2	LFPDUX	A4,  AO,  INCM2	add	AO,  AO,  INCM2	LFPDUX	A5,  AO,  INCM2	add	AO,  AO,  INCM2	LFPDUX	A6,  AO,  INCM2	subi	AO,  AO,  2 * SIZE	fxsmul	   f3,  A1, f3	fxcpnmsub  f1,  A1, f3,  f1	fxcsnmsub  f2,  A2, f3,  f2	fxcpnmsub  f0,  A2, f3,  f0	fxpmul	   f1,  A3, f1	fxcsnmsub  f2,  A4, f1,  f2	fxcpnmsub  f0,  A4, f1,  f0	fxsmul	   f2,  A5, f2	fxcpnmsub  f0,  A5, f2,  f0	fxpmul	   f0,  A6, f0#endif#ifdef LT	LFPDUX	A1,  AO,  INC2	LFPDUX	A2,  AO,  INC2	LFPDUX	A3,  AO,  INC2	LFPDUX	A4,  AO,  INC2	add	AO,  AO,  INC2	LFPDUX	A5,  AO,  INC2	add	AO,  AO,  INC2	LFPDUX	A6,  AO,  INC2	subi	AO,  AO,  16 * SIZE	fxpmul	   f0,  A1,  f0	fxcsnmsub  f2,  A1, f0, f2	fxcpnmsub  f1,  A2, f0, f1	fxcsnmsub  f3,  A2, f0, f3	fxsmul	   f2,  A3,  f2	fxcpnmsub  f1,  A4,  f2,  f1	fxcsnmsub  f3,  A4,  f2,  f3	fxpmul	   f1,  A5,  f1	fxcsnmsub  f3,  A5,  f1, f3	fxsmul	   f3,  A6,  f3#endif#ifdef RN	LFPDUX	A1,  BO,  INC2	LFPDUX	A2,  BO,  INC2	subi	BO, BO, 4 * SIZE	fxpmul	f0,  A1,  f0	fxpmul	f1,  A1,  f1	fxcsnmsub    f2,  A1, f0, f2	fxcsnmsub    f3,  A1, f1, f3	fxsmul	f2,  A2,  f2	fxsmul	f3,  A2,  f3#endif#ifdef RT	LFPDUX	A2,  BO,  INC2	LFPDUX	A1,  BO,  INC2	subi	BO, BO, 4 * SIZE	fxsmul	f2,  A1,  f2	fxsmul	f3,  A1,  f3	fxcpnmsub    f0,  A1, f2,  f0	fxcpnmsub    f1,  A1, f3,  f1	fxpmul	f0,  A2,  f0	fxpmul	f1,  A2,  f1#endif#ifdef LN	subi	CO1, CO1, 4 * SIZE	subi	CO2, CO2, 4 * SIZE#endif#if defined(LN) || defined(LT)	STFPDUX	f0,  BO,  INC2	STFPDUX	f2,  BO,  INC2	STFPDUX	f1,  BO,  INC2	STFPDUX	f3,  BO,  INC2	subi	BO,  BO,   8 * SIZE	STFDUX	f0,  CO1, INC	STFDUX	f2,  CO1, INC	STFDUX	f1,  CO1, INC	STFDUX	f3,  CO1, INC	STFSDUX	f0,  CO2, INC	STFSDUX	f2,  CO2, INC	STFSDUX	f1,  CO2, INC	STFSDUX	f3,  CO2, INC#else	STFPDUX	f0,  AO,  INC2	STFPDUX	f1,  AO,  INC2	STFPDUX	f2,  AO,  INC2	STFPDUX	f3,  AO,  INC2	subi	AO,  AO,   8 * SIZE	STFDUX	f0,  CO1, INC	STFSDUX	f0,  CO1, INC	STFDUX	f1,  CO1, INC	STFSDUX	f1,  CO1, INC	STFDUX	f2,  CO2, INC	STFSDUX	f2,  CO2, INC	STFDUX	f3,  CO2, INC	STFSDUX	f3,  CO2, INC#endif#ifdef LN	subi	CO1, CO1, 4 * SIZE	subi	CO2, CO2, 4 * SIZE#endif#ifdef RT	slwi	r0, K, 2 + BASE_SHIFT	add	AORIG, AORIG, r0#endif#if defined(LT) || defined(RN)	sub	TEMP, K, KK	slwi	r0,   TEMP, 2 + BASE_SHIFT	slwi	TEMP, TEMP, 1 + BASE_SHIFT	add	AO, AO, r0	add	BO, BO, TEMP#endif#ifdef LT	addi	KK, KK, 4#endif#ifdef LN	subi	KK, KK, 4#endif	li	r0, FZERO	lfpsx	f0, SP, r0	.align 4.L80:	srawi.	I, M,  3	ble	.L89	.align 4.L51:#if defined(LT) || defined(RN)	fpmr	f4,  f0	addi	BO,   B,  - 2 * SIZE 	fpmr	f1,  f0	fpmr	f5,  f0	fpmr	f2,  f0	fpmr	f6,  f0	srawi.	r0,  KK,  2	fpmr	f3,  f0	mtspr	CTR, r0	fpmr	f7,  f0	ble	.L54#else#ifdef LN	slwi	r0,   K,  3 + BASE_SHIFT	sub	AORIG, AORIG, r0#endif	slwi	r0  , KK, 3 + BASE_SHIFT	slwi	TEMP, KK, 1 + BASE_SHIFT	add	AO, AORIG, r0	add	BO, B,     TEMP	sub	TEMP, K, KK	fpmr	f4,  f0	addi	BO,  BO,  - 2 * SIZE 	fpmr	f1,  f0	fpmr	f5,  f0	fpmr	f2,  f0	fpmr	f6,  f0	srawi.	r0,  TEMP,  2	fpmr	f3,  f0	mtspr	CTR, r0	fpmr	f7,  f0	ble	.L54#endif	LFPDUX	B1,  BO,  INC2	LFPDUX	A1,  AO,  INC2	LFPDUX	A2,  AO,  INC2	LFPDUX	B2,  BO,  INC2	LFPDUX	A3,  AO,  INC2	LFPDUX	A4,  AO,  INC2	LFPDUX	B3,  BO,  INC2	LFPDUX	A5,  AO,  INC2	LFPDUX	A6,  AO,  INC2	LFPDUX	A7,  AO,  INC2	LFPDUX	A8,  AO,  INC2	bdz-	.L53	.align 4.L52:	fxcpmadd	f0,  B1, A1, f0	LFPDUX	B4,  BO,  INC2	fxcsmadd	f4,  B1, A1, f4	LFPDUX	A1,  AO,  INC2	fxcpmadd	f1,  B1, A2, f1	nop	fxcsmadd	f5,  B1, A2, f5	LFPDUX	A2,  AO,  INC2	fxcpmadd	f2,  B1, A3, f2	nop	fxcsmadd	f6,  B1, A3, f6	LFPDUX	A3,  AO,  INC2	fxcpmadd	f3,  B1, A4, f3	nop	fxcsmadd	f7,  B1, A4, f7	LFPDUX	A4,  AO,  INC2	fxcpmadd	f0,  B2, A5, f0	LFPDUX	B1,  BO,  INC2	fxcsmadd	f4,  B2, A5, f4	LFPDUX	A5,  AO,  INC2	fxcpmadd	f1,  B2, A6, f1	nop	fxcsmadd	f5,  B2, A6, f5	LFPDUX	A6,  AO,  INC2	fxcpmadd	f2,  B2, A7, f2	nop	fxcsmadd	f6,  B2, A7, f6	LFPDUX	A7,  AO,  INC2	fxcpmadd	f3,  B2, A8, f3	nop	fxcsmadd	f7,  B2, A8, f7	LFPDUX	A8,  AO,  INC2	fxcpmadd	f0,  B3, A1, f0	LFPDUX	B2,  BO,  INC2	fxcsmadd	f4,  B3, A1, f4	LFPDUX	A1,  AO,  INC2	fxcpmadd	f1,  B3, A2, f1	nop	fxcsmadd	f5,  B3, A2, f5	LFPDUX	A2,  AO,  INC2	fxcpmadd	f2,  B3, A3, f2	nop	fxcsmadd	f6,  B3, A3, f6	LFPDUX	A3,  AO,  INC2	fxcpmadd	f3,  B3, A4, f3	nop	fxcsmadd	f7,  B3, A4, f7	LFPDUX	A4,  AO,  INC2	fxcpmadd	f0,  B4, A5, f0	LFPDUX	B3,  BO,  INC2	fxcsmadd	f4,  B4, A5, f4	LFPDUX	A5,  AO,  INC2	fxcpmadd	f1,  B4, A6, f1	nop	fxcsmadd	f5,  B4, A6, f5	LFPDUX	A6,  AO,  INC2	fxcpmadd	f2,  B4, A7, f2	nop	fxcsmadd	f6,  B4, A7, f6	LFPDUX	A7,  AO,  INC2	fxcpmadd	f3,  B4, A8, f3	nop	fxcsmadd	f7,  B4, A8, f7	LFPDUX	A8,  AO,  INC2	bdnz+	.L52	.align 4.L53:	fxcpmadd	f0,  B1, A1, f0	LFPDUX	B4,  BO,  INC2	fxcsmadd	f4,  B1, A1, f4	LFPDUX	A1,  AO,  INC2	fxcpmadd	f1,  B1, A2, f1	nop	fxcsmadd	f5,  B1, A2, f5	LFPDUX	A2,  AO,  INC2	fxcpmadd	f2,  B1, A3, f2	nop	fxcsmadd	f6,  B1, A3, f6	LFPDUX	A3,  AO,  INC2	fxcpmadd	f3,  B1, A4, f3	nop	fxcsmadd	f7,  B1, A4, f7	LFPDUX	A4,  AO,  INC2	fxcpmadd	f0,  B2, A5, f0	nop	fxcsmadd	f4,  B2, A5, f4	LFPDUX	A5,  AO,  INC2	fxcpmadd	f1,  B2, A6, f1	nop	fxcsmadd	f5,  B2, A6, f5	LFPDUX	A6,  AO,  INC2	fxcpmadd	f2,  B2, A7, f2	nop	fxcsmadd	f6,  B2, A7, f6	LFPDUX	A7,  AO,  INC2	fxcpmadd	f3,  B2, A8,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -