⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trsm_kernel_hummer_ln.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 5 页
字号:
	fxcpnmsub  f9,  A2, f8, f9	fxcsnmsub  f5,  A2, f0, f5	fxcsnmsub  f13, A2, f8, f13	fxcpnmsub  f2,  A3, f0, f2	fxcpnmsub  f10, A3, f8, f10	fxcsnmsub  f6,  A3, f0, f6	fxcsnmsub  f14, A3, f8, f14	fxcpnmsub  f3,  A4, f0, f3	fxcpnmsub  f11, A4, f8, f11	fxcsnmsub  f7,  A4, f0, f7	fxcsnmsub  f15, A4, f8, f15	fxsmul	f4,  A5,  f4	fxsmul	f12, A5,  f12	fxcpnmsub  f1,  A6, f4,  f1	fxcpnmsub  f9,  A6, f12, f9	fxcsnmsub  f5,  A6, f4,  f5	fxcsnmsub  f13, A6, f12, f13	fxcpnmsub  f2,  A7, f4,  f2	fxcpnmsub  f10, A7, f12, f10	fxcsnmsub  f6,  A7, f4,  f6	fxcsnmsub  f14, A7, f12, f14	fxcpnmsub  f3,  A8, f4,  f3	fxcpnmsub  f11, A8, f12, f11	fxcsnmsub  f7,  A8, f4,  f7	fxcsnmsub  f15, A8, f12, f15	add	AO,  AO,  INC4	LFPDUX	A1,  AO2, INC4	LFPDUX	A2,  AO,  INC4	LFPDUX	A3,  AO2, INC4	add	AO,  AO,  INC4	LFPDUX	A4,  AO2, INC4	LFPDUX	A5,  AO,  INC4	LFPDUX	A6,  AO2, INC4	add	AO,  AO,  INC4	add	AO2, AO2, INC4	LFPDUX	A7,  AO,  INC4	LFPDUX	A8,  AO2, INC4	fxpmul	f1,  A1,  f1	fxpmul	f9,  A1,  f9	fxcsnmsub  f5,  A1, f1, f5	fxcsnmsub  f13, A1, f9, f13	fxcpnmsub  f2,  A2, f1, f2	fxcpnmsub  f10, A2, f9, f10	fxcsnmsub  f6,  A2, f1, f6	fxcsnmsub  f14, A2, f9, f14	fxcpnmsub  f3,  A3, f1, f3	fxcpnmsub  f11, A3, f9, f11	fxcsnmsub  f7,  A3, f1, f7	fxcsnmsub  f15, A3, f9, f15	fxsmul	f5,  A4,  f5	fxsmul	f13, A4,  f13	fxcpnmsub  f2,  A5, f5,  f2	fxcpnmsub  f10, A5, f13, f10	fxcsnmsub  f6,  A5, f5,  f6	fxcsnmsub  f14, A5, f13, f14	fxcpnmsub  f3,  A6, f5,  f3	fxcpnmsub  f11, A6, f13, f11	fxcsnmsub  f7,  A6, f5,  f7	fxcsnmsub  f15, A6, f13, f15	fxpmul	f2,  A7,  f2	fxpmul	f10, A7,  f10	fxcsnmsub  f6,  A7, f2,  f6	fxcsnmsub  f14, A7, f10, f14	fxcpnmsub  f3,  A8, f2,  f3	fxcpnmsub  f11, A8, f10, f11	fxcsnmsub  f7,  A8, f2,  f7	fxcsnmsub  f15, A8, f10, f15	add	AO,  AO,  INC4	add	AO2, AO2, INC4	LFPDUX	A1,  AO,  INC4	LFPDUX	A2,  AO2, INC4	addi	AO,  AO,  8 * SIZE	addi	AO2, AO2, 4 * SIZE	LFPDUX	A3,  AO2, INC4	addi	AO,  AO,  8 * SIZE	addi	AO2, AO2, 4 * SIZE	LFPDUX	A4,  AO2, INC4	subi	AO,  AO,  64 * SIZE	subi	AO2, AO2, 64 * SIZE	fxsmul	f6,  A1,  f6	fxsmul	f14, A1,  f14	fxcpnmsub  f3,  A2, f6,  f3	fxcpnmsub  f11, A2, f14, f11	fxcsnmsub  f7,  A2, f6,  f7	fxcsnmsub  f15, A2, f14, f15	fxpmul	f3,  A3,  f3	fxpmul	f11, A3,  f11	fxcsnmsub  f7,  A3, f3,  f7	fxcsnmsub  f15, A3, f11, f15	fxsmul	f7,  A4,  f7	fxsmul	f15, A4,  f15#endif#ifdef RN	LFPDUX	A1,  BO,  INC4	LFPDUX	A2,  BO2, INC4	LFPDUX	A3,  BO,  INC4	LFPDUX	A4,  BO2, INC4	add	BO,  BO,  INC4	LFPDUX	A5,  BO2, INC4	add	BO,  BO,  INC4	LFPDUX	A6,  BO2, INC4	subi	BO,  BO,  16 * SIZE	subi	BO2, BO2, 16 * SIZE	fxpmul	f0,  A1,  f0	fxpmul	f1,  A1,  f1	fxpmul	f2,  A1,  f2	fxpmul	f3,  A1,  f3	fxcsnmsub    f4,  A1, f0, f4	fxcsnmsub    f5,  A1, f1, f5	fxcsnmsub    f6,  A1, f2, f6	fxcsnmsub    f7,  A1, f3, f7	fxcpnmsub    f8,  A2, f0, f8	fxcpnmsub    f9,  A2, f1, f9	fxcpnmsub    f10, A2, f2, f10	fxcpnmsub    f11, A2, f3, f11	fxcsnmsub    f12, A2, f0, f12	fxcsnmsub    f13, A2, f1, f13	fxcsnmsub    f14, A2, f2, f14	fxcsnmsub    f15, A2, f3, f15	fxsmul	f4,  A3,  f4	fxsmul	f5,  A3,  f5	fxsmul	f6,  A3,  f6	fxsmul	f7,  A3,  f7	fxcpnmsub    f8,  A4, f4, f8	fxcpnmsub    f9,  A4, f5, f9	fxcpnmsub    f10, A4, f6, f10	fxcpnmsub    f11, A4, f7, f11	fxcsnmsub    f12, A4, f4, f12	fxcsnmsub    f13, A4, f5, f13	fxcsnmsub    f14, A4, f6, f14	fxcsnmsub    f15, A4, f7, f15	fxpmul	f8,  A5,  f8	fxpmul	f9,  A5,  f9	fxpmul	f10, A5,  f10	fxpmul	f11, A5,  f11	fxcsnmsub    f12, A5, f8,  f12	fxcsnmsub    f13, A5, f9,  f13	fxcsnmsub    f14, A5, f10, f14	fxcsnmsub    f15, A5, f11, f15	fxsmul	f12,  A6,  f12	fxsmul	f13,  A6,  f13	fxsmul	f14,  A6,  f14	fxsmul	f15,  A6,  f15#endif#ifdef RT	addi	BO,  BO,  20 * SIZE	addi	BO2, BO2, 20 * SIZE	LFPDUX	A1,  BO2, INCM4	LFPDUX	A2,  BO,  INCM4	LFPDUX	A3,  BO2, INCM4	LFPDUX	A4,  BO,  INCM4	add	BO2, BO2, INCM4	LFPDUX	A5,  BO,  INCM4	add	BO2, BO2, INCM4	LFPDUX	A6,  BO,  INCM4	subi	BO,  BO,  4 * SIZE	subi	BO2, BO2, 4 * SIZE	fxsmul	f12, A1,  f12	fxsmul	f13, A1,  f13	fxsmul	f14, A1,  f14	fxsmul	f15, A1,  f15	fxcpnmsub    f8,  A1, f12, f8	fxcpnmsub    f9,  A1, f13, f9	fxcpnmsub    f10, A1, f14, f10	fxcpnmsub    f11, A1, f15, f11	fxcsnmsub    f4,  A2, f12, f4	fxcsnmsub    f5,  A2, f13, f5	fxcsnmsub    f6,  A2, f14, f6	fxcsnmsub    f7,  A2, f15, f7	fxcpnmsub    f0,  A2, f12, f0	fxcpnmsub    f1,  A2, f13, f1	fxcpnmsub    f2,  A2, f14, f2	fxcpnmsub    f3,  A2, f15, f3	fxpmul	f8,  A3,  f8	fxpmul	f9,  A3,  f9	fxpmul	f10, A3,  f10	fxpmul	f11, A3,  f11	fxcsnmsub    f4,  A4, f8,  f4	fxcsnmsub    f5,  A4, f9,  f5	fxcsnmsub    f6,  A4, f10, f6	fxcsnmsub    f7,  A4, f11, f7	fxcpnmsub    f0,  A4, f8,  f0	fxcpnmsub    f1,  A4, f9,  f1	fxcpnmsub    f2,  A4, f10, f2	fxcpnmsub    f3,  A4, f11, f3	fxsmul	f4,  A5,  f4	fxsmul	f5,  A5,  f5	fxsmul	f6,  A5,  f6	fxsmul	f7,  A5,  f7	fxcpnmsub    f0,  A5, f4,  f0	fxcpnmsub    f1,  A5, f5,  f1	fxcpnmsub    f2,  A5, f6,  f2	fxcpnmsub    f3,  A5, f7,  f3	fxpmul	f0,  A6,  f0	fxpmul	f1,  A6,  f1	fxpmul	f2,  A6,  f2	fxpmul	f3,  A6,  f3#endif#ifdef LN	subi	CO1, CO1, 8 * SIZE	subi	CO2, CO2, 8 * SIZE	subi	CO3, CO3, 8 * SIZE	subi	CO4, CO4, 8 * SIZE#endif#if defined(LN) || defined(LT)	STFPDUX	f0,  BO,  INC4	STFPDUX	f8,  BO2, INC4	STFPDUX	f4,  BO,  INC4	STFPDUX	f12, BO2, INC4	STFPDUX	f1,  BO,  INC4	STFPDUX	f9,  BO2, INC4	STFPDUX	f5,  BO,  INC4	STFPDUX	f13, BO2, INC4	STFPDUX	f2,  BO,  INC4	STFPDUX	f10, BO2, INC4	STFPDUX	f6,  BO,  INC4	STFPDUX	f14, BO2, INC4	STFPDUX	f3,  BO,  INC4	STFPDUX	f11, BO2, INC4	STFPDUX	f7,  BO,  INC4	STFPDUX	f15, BO2, INC4	subi	BO,  BO,  32 * SIZE	subi	BO2, BO2, 32 * SIZE	STFDUX	f0,  CO1, INC	STFDUX	f4,  CO1, INC	STFDUX	f1,  CO1, INC	STFDUX	f5,  CO1, INC	STFDUX	f2,  CO1, INC	STFDUX	f6,  CO1, INC	STFDUX	f3,  CO1, INC	STFDUX	f7,  CO1, INC	STFSDUX	f0,  CO2, INC	STFSDUX	f4,  CO2, INC	STFSDUX	f1,  CO2, INC	STFSDUX	f5,  CO2, INC	STFSDUX	f2,  CO2, INC	STFSDUX	f6,  CO2, INC	STFSDUX	f3,  CO2, INC	STFSDUX	f7,  CO2, INC	STFDUX	f8,  CO3, INC	STFDUX	f12, CO3, INC	STFDUX	f9,  CO3, INC	STFDUX	f13, CO3, INC	STFDUX	f10, CO3, INC	STFDUX	f14, CO3, INC	STFDUX	f11, CO3, INC	STFDUX	f15, CO3, INC	STFSDUX	f8,  CO4, INC	STFSDUX	f12, CO4, INC	STFSDUX	f9,  CO4, INC	STFSDUX	f13, CO4, INC	STFSDUX	f10, CO4, INC	STFSDUX	f14, CO4, INC	STFSDUX	f11, CO4, INC	STFSDUX	f15, CO4, INC#else	STFPDUX	f0,  AO,  INC4	STFPDUX	f1,  AO2, INC4	STFPDUX	f2,  AO,  INC4	STFPDUX	f3,  AO2, INC4	STFPDUX	f4,  AO,  INC4	STFPDUX	f5,  AO2, INC4	STFPDUX	f6,  AO,  INC4	STFPDUX	f7,  AO2, INC4	STFPDUX	f8,  AO,  INC4	STFPDUX	f9,  AO2, INC4	STFPDUX	f10, AO,  INC4	STFPDUX	f11, AO2, INC4	STFPDUX	f12, AO,  INC4	STFPDUX	f13, AO2, INC4	STFPDUX	f14, AO,  INC4	STFPDUX	f15, AO2, INC4	subi	AO,  AO,  32 * SIZE	subi	AO2, AO2, 32 * SIZE	STFDUX	f0,  CO1, INC	STFSDUX	f0,  CO1, INC	STFDUX	f1,  CO1, INC	STFSDUX	f1,  CO1, INC	STFDUX	f2,  CO1, INC	STFSDUX	f2,  CO1, INC	STFDUX	f3,  CO1, INC	STFSDUX	f3,  CO1, INC	STFDUX	f4,  CO2, INC	STFSDUX	f4,  CO2, INC	STFDUX	f5,  CO2, INC	STFSDUX	f5,  CO2, INC	STFDUX	f6,  CO2, INC	STFSDUX	f6,  CO2, INC	STFDUX	f7,  CO2, INC	STFSDUX	f7,  CO2, INC	STFDUX	f8,  CO3, INC	STFSDUX	f8,  CO3, INC	STFDUX	f9,  CO3, INC	STFSDUX	f9,  CO3, INC	STFDUX	f10, CO3, INC	STFSDUX	f10, CO3, INC	STFDUX	f11, CO3, INC	STFSDUX	f11, CO3, INC	STFDUX	f12, CO4, INC	STFSDUX	f12, CO4, INC	STFDUX	f13, CO4, INC	STFSDUX	f13, CO4, INC	STFDUX	f14, CO4, INC	STFSDUX	f14, CO4, INC	STFDUX	f15, CO4, INC	STFSDUX	f15, CO4, INC#endif#ifdef LN	subi	CO1, CO1, 8 * SIZE	subi	CO2, CO2, 8 * SIZE	subi	CO3, CO3, 8 * SIZE	subi	CO4, CO4, 8 * SIZE#endif#ifdef RT	slwi	r0, K, 3 + BASE_SHIFT	add	AORIG, AORIG, r0#endif#if defined(LT) || defined(RN)	sub	TEMP, K, KK	slwi	r0,   TEMP, 3 + BASE_SHIFT	slwi	TEMP, TEMP, 2 + BASE_SHIFT	add	AO, AO, r0	add	BO, BO, TEMP#endif#ifdef LT	addi	KK, KK, 8#endif#ifdef LN	subi	KK, KK, 8#endif	addic.	I, I, -1	li	r0, FZERO	lfpsx	f0, SP, r0	bgt+	.L11	.align 4.L49:#ifdef LN	slwi	r0, K, 2 + BASE_SHIFT	add	B, B, r0#endif#if defined(LT) || defined(RN)	addi	B,  BO, 4 * SIZE#endif#ifdef RN	addi	KK, KK, 4#endif#ifdef RT	subi	KK, KK, 4#endif	addic.	J, J, -1	bgt+	.L10	.align 4.L50:	andi.	J, N,  2	beq	.L90#ifdef RT	slwi	r0, K, 1 + BASE_SHIFT	sub	B, B, r0	slwi	r0, LDC, 1	sub	C, C, r0#endif	mr	CO1, C	add	CO2, C,   LDC#ifdef LN	add	KK, M, OFFSET#endif#ifdef LT	mr	KK, OFFSET#endif#if defined(LN) || defined(RT)	addi	AORIG, A, -2 * SIZE#else	addi	AO, A, -2 * SIZE#endif#ifndef RT	add	C,  CO2, LDC#endif	li	r0, FZERO	lfpsx	f0, SP, r0	andi.	I, M,  1	beq	.L60#if defined(LT) || defined(RN)	addi	BO,  B,  - 2 * SIZE	fpmr	f1,  f0	fpmr	f2,  f0	fpmr	f3,  f0	srawi.	r0,  KK,  3	mtspr	CTR, r0	ble	.L84#else#ifdef LN	slwi	r0,   K,  0 + BASE_SHIFT	sub	AORIG, AORIG, r0#endif	slwi	r0  , KK, 0 + BASE_SHIFT	slwi	TEMP, KK, 1 + BASE_SHIFT	add	AO, AORIG, r0	add	BO, B,     TEMP	sub	TEMP, K, KK	addi	BO,  BO,  - 2 * SIZE	fpmr	f1,  f0	fpmr	f2,  f0	fpmr	f3,  f0	srawi.	r0,  TEMP,  3	mtspr	CTR, r0	ble	.L84#endif	LFPDUX	B1,  BO,  INC2	LFPDUX	A1,  AO,  INC2	LFPDUX	A2,  AO,  INC2	LFPDUX	B2,  BO,  INC2	LFPDUX	A3,  AO,  INC2	LFPDUX	A4,  AO,  INC2	LFPDUX	B3,  BO,  INC2	LFPDUX	B4,  BO,  INC2	bdz-	.L83	.align 4.L82:	fxcpmadd	f0,  A1, B1, f0	LFPDUX	B1,  BO,  INC2	fxcsmadd	f1,  A1, B2, f1	LFPDUX	B2,  BO,  INC2	LFPDUX	A1,  AO,  INC2	fxcpmadd	f2,  A2, B3, f2	LFPDUX	B3,  BO,  INC2	fxcsmadd	f3,  A2, B4, f3	LFPDUX	B4,  BO,  INC2	LFPDUX	A2,  AO,  INC2	fxcpmadd	f0,  A3, B1, f0	LFPDUX	B1,  BO,  INC2	fxcsmadd	f1,  A3, B2, f1	LFPDUX	B2,  BO,  INC2	LFPDUX	A3,  AO,  INC2	fxcpmadd	f2,  A4, B3, f2	LFPDUX	B3,  BO,  INC2	fxcsmadd	f3,  A4, B4, f3	LFPDUX	B4,  BO,  INC2	LFPDUX	A4,  AO,  INC2	bdnz+	.L82	.align 4.L83:	fxcpmadd	f0,  A1, B1, f0	LFPDUX	B1,  BO,  INC2	fxcsmadd	f1,  A1, B2, f1	LFPDUX	B2,  BO,  INC2	fxcpmadd	f2,  A2, B3, f2	LFPDUX	B3,  BO,  INC2	fxcsmadd	f3,  A2, B4, f3	LFPDUX	B4,  BO,  INC2	fxcpmadd	f0,  A3, B1, f0	fxcsmadd	f1,  A3, B2, f1	fxcpmadd	f2,  A4, B3, f2	fxcsmadd	f3,  A4, B4, f3	.align 4.L84:#if defined(LT) || defined(RN)	andi.	r0,  KK,  7	mtspr	CTR, r0	ble+	.L88#else	andi.	r0, TEMP, 7	mtspr	CTR, r0	ble+	.L88#endif	LFDX	A1,  AO,  INC2	LFPDUX	B1,  BO,  INC2	add	AO, AO, INC	bdz-	.L87	.align 4.L86:	fxcpmadd	f0,  A1, B1, f0	LFDX	A1,  AO,  INC2	LFPDUX	B1,  BO,  INC2	add	AO, AO, INC	bdnz+	.L86	.align 4.L87:	fxcpmadd	f0,  A1, B1, f0	.align 4.L88:	fpadd	f0, f0, f1	fpadd	f2, f2, f3	fpadd	f0, f0, f2#if defined(LN) || defined(RT)#ifdef LN	subi	r0, KK, 1#else	subi	r0, KK, 2#endif	slwi	TEMP, r0, 0 + BASE_SHIFT	slwi	r0,   r0, 1 + BASE_SHIFT	add	AO, AORIG, TEMP	add	BO,  B,     r0	addi	BO,  BO, - 2 * SIZE#endif#if defined(LN) || defined(LT)	LFPDX	f16, BO,  INC2	fpsub	f0,  f16,  f0#else	LFPDX	f16, AO,  INC2	fpsub	f0,  f16,  f0#endif#ifdef LN	LFPDX	A1,  AO,  INC2	fxpmul	   f0,  A1, f0#endif#ifdef LT	LFPDX	A1,  AO,  INC2	fxpmul	   f0,  A1, f0#endif#ifdef RN	LFD	A1,  (2 + 0) * SIZE(BO)	LFD	A2,  (2 + 1) * SIZE(BO)	LFD	A3,  (2 + 3) * SIZE(BO)	fsmtp	     f1, f0	fmul	     f0,  A1, f0	fnmsub	     f1,  A2, f0, f1	fmul	     f1,  A3, f1	fsmfp	     f0, f1#endif#ifdef RT	LFD	A1,  (2 + 3) * SIZE(BO)	LFD	A2,  (2 + 2) * SIZE(BO)	LFD	A3,  (2 + 0) * SIZE(BO)	fsmtp	     f1, f0	fmul	     f1,  A1, f1	fnmsub	     f0,  A2, f1, f0	fmul	     f0,  A3, f0	fsmfp	     f0, f1#endif#ifdef LN	subi	CO1, CO1, 1 * SIZE	subi	CO2, CO2, 1 * SIZE#endif#if defined(LN) || defined(LT)	STFPDX	f0,  BO,  INC2	STFDUX	f0,  CO1, INC	STFSDUX	f0,  CO2, INC#else	STFPDX	f0,  AO,  INC2	STFDUX	f0,  CO1, INC	STFDUX	f1,  CO2, INC#endif#ifdef LN	subi	CO1, CO1, 1 * SIZE	subi	CO2, CO2, 1 * SIZE#endif#ifdef RT	slwi	r0, K, 0 + BASE_SHIFT	add	AORIG, AORIG, r0#endif#if defined(LT) || defined(RN)	sub	TEMP, K, KK	slwi	r0,   TEMP, 0 + BASE_SHIFT	slwi	TEMP, TEMP, 1 + BASE_SHIFT	add	AO, AO, r0	add	BO, BO, TEMP#endif#ifdef LT	addi	KK, KK, 1#endif#ifdef LN	subi	KK, KK, 1#endif	li	r0, FZERO	lfpsx	f0, SP, r0	.align 4.L60:	andi.	I, M,  2	beq	.L70#if defined(LT) || defined(RN)	addi	BO,  B,  - 2 * SIZE	fpmr	f1,  f0	fpmr	f2,  f0	fpmr	f3,  f0	srawi.	r0,  KK,  3	mtspr	CTR, r0	ble	.L74#else#ifdef LN	slwi	r0,   K,  1 + BASE_SHIFT	sub	AORIG, AORIG, r0#endif	slwi	r0  , KK, 1 + BASE_SHIFT	slwi	TEMP, KK, 1 + BASE_SHIFT	add	AO, AORIG, r0	add	BO, B,     TEMP	sub	TEMP, K, KK	addi	BO,  BO,  - 2 * SIZE	fpmr	f1,  f0	fpmr	f2,  f0	fpmr	f3, f0	srawi.	r0,  TEMP,  3	mtspr	CTR, r0	ble	.L74#endif	LFPDUX	A1,  AO, INC2	LFPDUX	B1,  BO, INC2	LFPDUX	A2,  AO, INC2	LFPDUX	B2,  BO, INC2	LFPDUX	A3,  AO, INC2	LFPDUX	B3,  BO, INC2	LFPDUX	A4,  AO, INC2	LFPDUX	B4,  BO, INC2	LFPDUX	A5,  AO, INC2

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -