⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ztrsm_kernel_hummer_ln.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 4 页
字号:
#if defined(LN) || defined(LT)	LFPDUX	f16, BO,  INC4	LFPDUX	f20, BO2, INC4	LFPDUX	f17, BO,  INC4	LFPDUX	f21, BO2, INC4	LFPDUX	f18, BO,  INC4	LFPDUX	f22, BO2, INC4	LFPDUX	f19, BO,  INC4	LFPDUX	f23, BO2, INC4	subi	BO,  BO,  16 * SIZE	subi	BO2, BO2, 16 * SIZE#else	LFPDUX	f16, AO,  INC4	LFPDUX	f17, AO2, INC4	LFPDUX	f18, AO,  INC4	LFPDUX	f19, AO2, INC4	LFPDUX	f20, AO,  INC4	LFPDUX	f21, AO2, INC4	LFPDUX	f22, AO,  INC4	LFPDUX	f23, AO2, INC4	subi	AO,  AO,  16 * SIZE	subi	AO2, AO2, 16 * SIZE#endif	fpsub	f0,  f16,  f0	fpsub	f1,  f17,  f1	fpsub	f2,  f18,  f2	fpsub	f3,  f19,  f3	fpsub	f8,  f20,  f8	fpsub	f9,  f21,  f9	fpsub	f10, f22,  f10	fpsub	f11, f23,  f11#ifdef LN	LFPDUX	A1,  AO,  INC4	add	AO2, AO2, INC4	add	AO,  AO,  INC4	add	AO2, AO2, INC4	LFPDUX	A2,  AO,  INC4	LFPDUX	A3,  AO2, INC4	add	AO,  AO,  INC4	add	AO2, AO2, INC4	LFPDUX	A4,  AO,  INC4	LFPDUX	A5,  AO2, INC4	LFPDUX	A6,  AO,  INC4	add	AO2, AO2, INC4	LFPDUX	A7,  AO,  INC4	LFPDUX	A8,  AO2, INC4	LFPDUX	A9,  AO,  INC4	LFPDUX	A10, AO2, INC4	subi	AO,  AO,  32 * SIZE	subi	AO2, AO2, 32 * SIZE	fxpmul	  f4,  A10, f3	fxpmul	  f5,  A10, f11	FXCXNPMA  f3,  A10, f3,  f4	FXCXNPMA  f11, A10, f11, f5	fxcpnmsub f2,  A9, f3,  f2	fxcpnmsub f10, A9, f11, f10	FXCXNSMA  f2,  A9, f3,  f2	FXCXNSMA  f10, A9, f11, f10	fxcpnmsub f1,  A8, f3,  f1	fxcpnmsub f9,  A8, f11, f9	FXCXNSMA  f1,  A8, f3,  f1	FXCXNSMA  f9,  A8, f11, f9	fxcpnmsub f0,  A7, f3,  f0	fxcpnmsub f8,  A7, f11, f8	FXCXNSMA  f0,  A7, f3,  f0	FXCXNSMA  f8,  A7, f11, f8	fxpmul	  f4,  A6, f2	fxpmul	  f5,  A6, f10	FXCXNPMA  f2,  A6, f2,  f4	FXCXNPMA  f10, A6, f10, f5	fxcpnmsub f1,  A5, f2,  f1	fxcpnmsub f9,  A5, f10, f9	FXCXNSMA  f1,  A5, f2,  f1	FXCXNSMA  f9,  A5, f10, f9	fxcpnmsub f0,  A4, f2,  f0	fxcpnmsub f8,  A4, f10, f8	FXCXNSMA  f0,  A4, f2,  f0	FXCXNSMA  f8,  A4, f10, f8	fxpmul	  f4,  A3, f1	fxpmul	  f5,  A3, f9	FXCXNPMA  f1,  A3, f1,  f4	FXCXNPMA  f9,  A3, f9,  f5	fxcpnmsub f0,  A2, f1,  f0	fxcpnmsub f8,  A2, f9,  f8	FXCXNSMA  f0,  A2, f1,  f0	FXCXNSMA  f8,  A2, f9,  f8	fxpmul	  f4,  A1, f0	fxpmul	  f5,  A1, f8	FXCXNPMA  f0,  A1, f0,  f4	FXCXNPMA  f8,  A1, f8,  f5#endif#ifdef LT	LFPDUX	A1,  AO,  INC4	LFPDUX	A2,  AO2, INC4	LFPDUX	A3,  AO,  INC4	LFPDUX	A4,  AO2, INC4	add	AO,  AO,  INC4	LFPDUX	A5,  AO2, INC4	LFPDUX	A6,  AO,  INC4	LFPDUX	A7,  AO2, INC4	add	AO,  AO,  INC4	add	AO2, AO2, INC4	LFPDUX	A8,  AO,  INC4	LFPDUX	A9,  AO2, INC4	add	AO,  AO,  INC4	add	AO2, AO2, INC4	add	AO,  AO,  INC4	LFPDUX	A10, AO2, INC4	subi	AO,  AO,  32 * SIZE	subi	AO2, AO2, 32 * SIZE	fxpmul	  f4,  A1, f0	fxpmul	  f5,  A1, f8	FXCXNPMA  f0,  A1, f0, f4	FXCXNPMA  f8,  A1, f8, f5	fxcpnmsub f1,  A2, f0, f1	fxcpnmsub f9,  A2, f8, f9	FXCXNSMA  f1,  A2, f0, f1	FXCXNSMA  f9,  A2, f8, f9	fxcpnmsub f2,  A3, f0, f2	fxcpnmsub f10, A3, f8, f10	FXCXNSMA  f2,  A3, f0, f2	FXCXNSMA  f10, A3, f8, f10	fxcpnmsub f3,  A4, f0, f3	fxcpnmsub f11, A4, f8, f11	FXCXNSMA  f3,  A4, f0, f3	FXCXNSMA  f11, A4, f8, f11	fxpmul	  f6,  A5, f1	fxpmul	  f7,  A5, f9	FXCXNPMA  f1,  A5, f1, f6	FXCXNPMA  f9,  A5, f9, f7	fxcpnmsub f2,  A6, f1, f2	fxcpnmsub f10, A6, f9, f10	FXCXNSMA  f2,  A6, f1, f2	FXCXNSMA  f10, A6, f9, f10	fxcpnmsub f3,  A7, f1, f3	fxcpnmsub f11, A7, f9, f11	FXCXNSMA  f3,  A7, f1, f3	FXCXNSMA  f11, A7, f9, f11	fxpmul	  f4,  A8, f2	fxpmul	  f5,  A8, f10	FXCXNPMA  f2,  A8, f2,  f4	FXCXNPMA  f10, A8, f10, f5	fxcpnmsub f3,  A9, f2,  f3	fxcpnmsub f11, A9, f10, f11	FXCXNSMA  f3,  A9, f2,  f3	FXCXNSMA  f11, A9, f10, f11	fxpmul	  f6,  A10, f3	fxpmul	  f7,  A10, f11	FXCXNPMA  f3,  A10, f3,  f6	FXCXNPMA  f11, A10, f11, f7#endif#ifdef RN	LFPDUX	A1,  BO,  INC4	LFPDUX	A2,  BO2, INC4	add	BO,  BO,  INC4	LFPDUX	A3,  BO2, INC4	subi	BO,  BO,   8 * SIZE	subi	BO2, BO2,  8 * SIZE	fxpmul	  f4,  A1, f0	fxpmul	  f5,  A1, f1	fxpmul	  f6,  A1, f2	fxpmul	  f7,  A1, f3	FXCXNPMA  f0,  A1, f0, f4	FXCXNPMA  f1,  A1, f1, f5	FXCXNPMA  f2,  A1, f2, f6	FXCXNPMA  f3,  A1, f3, f7	fxcpnmsub f8,  A2, f0, f8	fxcpnmsub f9,  A2, f1, f9	fxcpnmsub f10, A2, f2, f10	fxcpnmsub f11, A2, f3, f11	FXCXNSMA  f8,  A2, f0, f8	FXCXNSMA  f9,  A2, f1, f9	FXCXNSMA  f10, A2, f2, f10	FXCXNSMA  f11, A2, f3, f11	fxpmul	  f4,  A3, f8	fxpmul	  f5,  A3, f9	fxpmul	  f6,  A3, f10	fxpmul	  f7,  A3, f11	FXCXNPMA  f8,  A3, f8,  f4	FXCXNPMA  f9,  A3, f9,  f5	FXCXNPMA  f10, A3, f10, f6	FXCXNPMA  f11, A3, f11, f7#endif#ifdef RT	LFPDUX	A1,  BO,  INC4	add	BO2, BO2, INC4	LFPDUX	A2,  BO,  INC4	LFPDUX	A3,  BO2, INC4	subi	BO,  BO,   8 * SIZE	subi	BO2, BO2,  8 * SIZE	fxpmul	  f4,  A3, f8	fxpmul	  f5,  A3, f9	fxpmul	  f6,  A3, f10	fxpmul	  f7,  A3, f11	FXCXNPMA  f8,  A3, f8,  f4	FXCXNPMA  f9,  A3, f9,  f5	FXCXNPMA  f10, A3, f10, f6	FXCXNPMA  f11, A3, f11, f7	fxcpnmsub f0,  A2, f8,  f0	fxcpnmsub f1,  A2, f9,  f1	fxcpnmsub f2,  A2, f10, f2	fxcpnmsub f3,  A2, f11, f3	FXCXNSMA  f0,  A2, f8,  f0	FXCXNSMA  f1,  A2, f9,  f1	FXCXNSMA  f2,  A2, f10, f2	FXCXNSMA  f3,  A2, f11, f3	fxpmul	  f4,  A1, f0	fxpmul	  f5,  A1, f1	fxpmul	  f6,  A1, f2	fxpmul	  f7,  A1, f3	FXCXNPMA  f0,  A1, f0,  f4	FXCXNPMA  f1,  A1, f1,  f5	FXCXNPMA  f2,  A1, f2,  f6	FXCXNPMA  f3,  A1, f3,  f7#endif#ifdef LN	subi	CO1, CO1, 8 * SIZE	subi	CO2, CO2, 8 * SIZE#endif#if defined(LN) || defined(LT)	STFPDUX	f0,  BO,  INC4	STFPDUX	f8,  BO2, INC4	STFPDUX	f1,  BO,  INC4	STFPDUX	f9,  BO2, INC4	STFPDUX	f2,  BO,  INC4	STFPDUX	f10, BO2, INC4	STFPDUX	f3,  BO,  INC4	STFPDUX	f11, BO2, INC4	subi	BO,  BO,  16 * SIZE	subi	BO2, BO2, 16 * SIZE#else	STFPDUX	f0,  AO,  INC4	STFPDUX	f1,  AO2, INC4	STFPDUX	f2,  AO,  INC4	STFPDUX	f3,  AO2, INC4	STFPDUX	f8,  AO,  INC4	STFPDUX	f9,  AO2, INC4	STFPDUX	f10, AO,  INC4	STFPDUX	f11, AO2, INC4	subi	AO,  AO,  16 * SIZE	subi	AO2, AO2, 16 * SIZE#endif	STFDUX	f0,  CO1, INC	STFSDUX	f0,  CO1, INC	STFDUX	f1,  CO1, INC	STFSDUX	f1,  CO1, INC	STFDUX	f2,  CO1, INC	STFSDUX	f2,  CO1, INC	STFDUX	f3,  CO1, INC	STFSDUX	f3,  CO1, INC	STFDUX	f8,  CO2, INC	STFSDUX	f8,  CO2, INC	STFDUX	f9,  CO2, INC	STFSDUX	f9,  CO2, INC	STFDUX	f10, CO2, INC	STFSDUX	f10, CO2, INC	STFDUX	f11, CO2, INC	STFSDUX	f11, CO2, INC#ifdef LN	subi	CO1, CO1, 8 * SIZE	subi	CO2, CO2, 8 * SIZE#endif#ifdef RT	slwi	r0, K, 2 + ZBASE_SHIFT	add	AORIG, AORIG, r0#endif#if defined(LT) || defined(RN)	sub	TEMP, K, KK	slwi	r0,   TEMP, 2 + ZBASE_SHIFT	slwi	TEMP, TEMP, 1 + ZBASE_SHIFT	add	AO, AO, r0	add	BO, BO, TEMP#endif#ifdef LT	addi	KK, KK, 4#endif#ifdef LN	subi	KK, KK, 4#endif	addic.	I, I, -1	li	r0, FZERO	lfpsx	f0, SP, r0	bgt+	.L11	.align 4.L49:#ifdef LN	slwi	r0, K, 1 + ZBASE_SHIFT	add	B, B, r0#endif#if defined(LT) || defined(RN)	addi	B,  BO, 4 * SIZE#endif#ifdef RN	addi	KK, KK, 2#endif#ifdef RT	subi	KK, KK, 2#endif	addic.	J, J, -1	bgt+	.L10	.align 4.L50:	andi.	J, N,  1	beq	.L999#ifdef RT	slwi	r0, K, 0 + ZBASE_SHIFT	sub	B, B, r0	sub	C, C, LDC#endif	mr	CO1, C#ifdef LN	add	KK, M, OFFSET#endif#ifdef LT	mr	KK, OFFSET#endif#if defined(LN) || defined(RT)	addi	AORIG, A, -2 * SIZE#else	addi	AO, A, -2 * SIZE#endif#ifndef RT	add	C,  CO2, LDC#endif	li	r0, FZERO	lfpsx	f0, SP, r0	andi.	I, M,  1	beq	.L60#if defined(LT) || defined(RN)	addi	BO,  B,  - 2 * SIZE	fpmr	f1,  f0	fpmr	f2,  f0	fpmr	f3,  f0	srawi.	r0, KK,  3	mtspr	CTR, r0	ble	.L74#else#ifdef LN	slwi	r0,   K,  0 + ZBASE_SHIFT	sub	AORIG, AORIG, r0#endif	slwi	TEMP, KK, 0 + ZBASE_SHIFT	add	AO, AORIG, TEMP	add	BO, B,     TEMP	sub	TEMP, K, KK	addi	BO,  BO,  - 2 * SIZE	fpmr	f1,  f0	fpmr	f2,  f0	fpmr	f3,  f0	srawi.	r0, TEMP,  3	mtspr	CTR, r0	ble	.L74#endif	LFPDUX	A1,  AO, INC2	LFPDUX	B1,  BO, INC2	LFPDUX	A2,  AO, INC2	LFPDUX	B2,  BO, INC2	LFPDUX	A3,  AO, INC2	LFPDUX	B3,  BO, INC2	LFPDUX	A4,  AO, INC2	LFPDUX	B4,  BO, INC2	LFPDUX	A5,  AO, INC2	LFPDUX	B5,  BO, INC2	LFPDUX	A6,  AO, INC2	LFPDUX	B6,  BO, INC2	LFPDUX	A7,  AO, INC2	LFPDUX	A9,  BO, INC2	LFPDUX	A8,  AO, INC2	LFPDUX	A10, BO, INC2	bdz-	.L73	.align 4.L72:	FXCPMADD	f0,  B1, A1, f0	FXCSMADD	f1,  B1, A1, f1	LFPDUX	A1,  AO, INC2	LFPDUX	B1,  BO, INC2	FXCPMADD	f2,  B2, A2, f2	FXCSMADD	f3,  B2, A2, f3	LFPDUX	A2,  AO, INC2	LFPDUX	B2,  BO, INC2	FXCPMADD	f0,  B3, A3, f0	FXCSMADD	f1,  B3, A3, f1	LFPDUX	A3,  AO, INC2	LFPDUX	B3,  BO, INC2	FXCPMADD	f2,  B4, A4, f2	FXCSMADD	f3,  B4, A4, f3	LFPDUX	A4,  AO, INC2	LFPDUX	B4,  BO, INC2	FXCPMADD	f0,  B5, A5, f0	FXCSMADD	f1,  B5, A5, f1	LFPDUX	A5,  AO, INC2	LFPDUX	B5,  BO, INC2	FXCPMADD	f2,  B6, A6, f2	FXCSMADD	f3,  B6, A6, f3	LFPDUX	A6,  AO, INC2	LFPDUX	B6,  BO, INC2	FXCPMADD	f0,  A9,  A7, f0	FXCSMADD	f1,  A9,  A7, f1	LFPDUX	A7,  AO, INC2	LFPDUX	A9,  BO, INC2	FXCPMADD	f2,  A10, A8, f2	FXCSMADD	f3,  A10, A8, f3	LFPDUX	A8,  AO, INC2	LFPDUX	A10, BO, INC2	bdnz+	.L72	.align 4.L73:	FXCPMADD	f0,  B1, A1, f0	FXCSMADD	f1,  B1, A1, f1	FXCPMADD	f2,  B2, A2, f2	FXCSMADD	f3,  B2, A2, f3	FXCPMADD	f0,  B3, A3, f0	FXCSMADD	f1,  B3, A3, f1	FXCPMADD	f2,  B4, A4, f2	FXCSMADD	f3,  B4, A4, f3	FXCPMADD	f0,  B5, A5, f0	FXCSMADD	f1,  B5, A5, f1	FXCPMADD	f2,  B6, A6, f2	FXCSMADD	f3,  B6, A6, f3	FXCPMADD	f0,  A9,  A7, f0	FXCSMADD	f1,  A9,  A7, f1	FXCPMADD	f2,  A10, A8, f2	FXCSMADD	f3,  A10, A8, f3	.align 4.L74:#if defined(LT) || defined(RN)	andi.	r0,  KK,  7	mtspr	CTR, r0	ble+	.L78#else	andi.	r0, TEMP, 7	mtspr	CTR, r0	ble+	.L78#endif	LFPDUX	A1,  AO,  INC2	LFPDUX	B1,  BO,  INC2	bdz-	.L77	.align 4.L76:	FXCPMADD	f0,  B1, A1, f0	FXCSMADD	f1,  B1, A1, f1	LFPDUX	A1,  AO,  INC2	LFPDUX	B1,  BO,  INC2	bdnz+	.L76	.align 4.L77:	FXCPMADD	f0,  B1, A1, f0	FXCSMADD	f1,  B1, A1, f1	.align 4.L78:	fpadd	f0, f0, f2	fpadd	f1, f1, f3	fpadd	f0, f0, f1#if defined(LN) || defined(RT)#ifdef LN	subi	r0, KK, 1#else	subi	r0, KK, 1#endif	slwi	TEMP, r0, 0 + ZBASE_SHIFT	add	AO, AORIG, TEMP	add	BO, B,     TEMP	addi	BO,  BO, - 2 * SIZE#endif#if defined(LN) || defined(LT)	LFPDX	f16, BO,  INC2#else	LFPDX	f16, AO,  INC2#endif	fpsub	f0,  f16,  f0#ifdef LN	LFPDX	A1,  AO,  INC2	fxpmul	  f4,  A1, f0	FXCXNPMA  f0,  A1, f0,  f4#endif#ifdef LT	LFPDX	A1,  AO,  INC2	fxpmul	  f4,  A1, f0	FXCXNPMA  f0,  A1, f0, f4#endif#ifdef RN	LFPDX	A1,  BO,  INC2	fxpmul	  f4,  A1, f0	FXCXNPMA  f0,  A1, f0, f4#endif#ifdef RT	LFPDX	A1,  BO,  INC2	fxpmul	  f4,  A1, f0	FXCXNPMA  f0,  A1, f0,  f4#endif#ifdef LN	subi	CO1, CO1, 2 * SIZE#endif#if defined(LN) || defined(LT)	STFPDX	f0,  BO,  INC2#else	STFPDX	f0,  AO,  INC2#endif	STFDUX	f0,  CO1, INC	STFSDUX	f0,  CO1, INC#ifdef LN	subi	CO1, CO1, 2 * SIZE#endif#ifdef RT	slwi	r0, K, 0 + ZBASE_SHIFT	add	AORIG, AORIG, r0#endif#if defined(LT) || defined(RN)	sub	TEMP, K, KK	slwi	TEMP, TEMP, 0 + ZBASE_SHIFT	add	AO, AO, TEMP	add	BO, BO, TEMP#endif#ifdef LT	addi	KK, KK, 1#endif#ifdef LN	subi	KK, KK, 1#endif	li	r0, FZERO	lfpsx	f0, SP, r0	.align 4.L60:	andi.	I, M,  2	beq	.L70#if defined(LT) || defined(RN) 	fpmr	f1,  f0	addi	BO,  B,  - 2 * SIZE	fpmr	f2,  f0	fpmr	f3,  f0	srawi.	r0,  KK,  2	mtspr	CTR, r0	ble	.L64#else#ifdef LN	slwi	r0,   K,  1 + ZBASE_SHIFT	sub	AORIG, AORIG, r0#endif	slwi	r0  , KK, 1 + ZBASE_SHIFT	slwi	TEMP, KK, 0 + ZBASE_SHIFT	add	AO, AORIG, r0	add	BO, B,     TEMP	sub	TEMP, K, KK 	fpmr	f1,  f0	addi	BO,  BO,  - 2 * SIZE	fpmr	f2,  f0	fpmr	f3,  f0	srawi.	r0,  TEMP,  2	mtspr	CTR, r0	ble	.L64#endif	LFPDUX	B1,  BO, INC2	LFPDUX	A1,  AO, INC2	LFPDUX	A2,  AO, INC2	LFPDUX	B2,  BO, INC2	LFPDUX	A3,  AO, INC2	LFPDUX	A4,  AO, INC2	LFPDUX	B3,  BO, INC2	LFPDUX	A5,  AO, INC2	LFPDUX	A6,  AO, INC2	LFPDUX	B4,  BO, INC2	LFPDUX	A7,  AO, INC2	LFPDUX	A8,  AO, INC2	bdz-	.L63	.align 4.L62:	FXCPMADD	f0,  B1, A1, f0	FXCSMADD	f2,  B1, A1, f2	LFPDUX	A1,  AO, INC2	FXCPMADD	f1,  B1, A2, f1	FXCSMADD	f3,  B1, A2, f3	LFPDUX	A2,  AO, INC2	LFPDUX	B1,  BO, INC2	FXCPMADD	f0,  B2, A3, f0	FXCSMADD	f2,  B2, A3, f2	LFPDUX	A3,  AO, INC2	FXCPMADD	f1,  B2, A4, f1	FXCSMADD	f3,  B2, A4, f3	LFPDUX	A4,  AO, INC2	LFPDUX	B2,  BO, INC2	FXCPMADD	f0,  B3, A5, f0	FXCSMADD	f2,  B3, A5, f2	LFPDUX	A5,  AO, INC2	FXCPMADD	f1,  B3, A6, f1	FXCSMADD	f3,  B3, A6, f3	LFPDUX	A6,  AO, INC2	LFPDUX	B3,  BO, INC2	FXCPMADD	f0,  B4, A7, f0	FXCSMADD	f2,  B4, A7, f2	LFPDUX	A7,  AO, INC2	FXCPMADD	f1,  B4, A8, f1	FXCSMADD	f3,  B4, A8, f3	LFPDUX	A8,  AO, INC2	LFPDUX	B4,  BO, INC2	bdnz+	.L62	.align 4.L63:	FXCPMADD	f0,  B1, A1, f0	FXCSMADD	f2,  B1, A1, f2	FXCPMADD	f1,  B1, A2, f1	FXCSMADD	f3,  B1, A2, f3	FXCPMADD	f0,  B2, A3, f0	FXCSMADD	f2,  B2, A3, f2	FXCPMADD	f1,  B2, A4, f1	FXCSMADD	f3,  B2, A4, f3	FXCPMADD	f0,  B3, A5, f0

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -