atl_dmm4x4xur3_mips.c

来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 2,389 行 · 第 1/5 页

C
2,389
字号
	madd.d	rC22, rC22, ra2, rB2	madd.d	rC32, rC32, ra3, rB2					ldc1	rB2, 472(pB2)	madd.d	rC03, rC03, ra0, rB3	madd.d	rC13, rC13, ra1, rB3					ldc1	rA3, 480(pA3)	madd.d	rC23, rC23, ra2, rB3	madd.d	rC33, rC33, ra3, rB3					ldc1	rB3, 472(pB3)	madd.d	rC00, rC00, rz0, rB0	madd.d	rC10, rC10, rz1, rB0					ldc1	ra0, 488(pA0)	madd.d	rC20, rC20, rz2, rB0	madd.d	rC30, rC30, rz3, rB0					ldc1	rB0, 480(pB0)	madd.d	rC01, rC01, rz0, rB1	madd.d	rC11, rC11, rz1, rB1					ldc1	ra1, 488(pA1)	madd.d	rC21, rC21, rz2, rB1	madd.d	rC31, rC31, rz3, rB1					ldc1	rB1, 480(pB1)	madd.d	rC02, rC02, rz0, rB2	madd.d	rC12, rC12, rz1, rB2					ldc1	ra2, 488(pA2)	madd.d	rC22, rC22, rz2, rB2	madd.d	rC32, rC32, rz3, rB2					ldc1	rB2, 480(pB2)	madd.d	rC03, rC03, rz0, rB3	madd.d	rC13, rC13, rz1, rB3					ldc1	ra3, 488(pA3)	madd.d	rC23, rC23, rz2, rB3	madd.d	rC33, rC33, rz3, rB3					ldc1	rB3, 480(pB3)#endif#if KB > 63	madd.d	rC00, rC00, rA0, rB0	madd.d	rC10, rC10, rA1, rB0					ldc1	rz0, 496(pA0)	madd.d	rC20, rC20, rA2, rB0	madd.d	rC30, rC30, rA3, rB0					ldc1	rB0, 488(pB0)	madd.d	rC01, rC01, rA0, rB1	madd.d	rC11, rC11, rA1, rB1					ldc1	rz1, 496(pA1)	madd.d	rC21, rC21, rA2, rB1	madd.d	rC31, rC31, rA3, rB1					ldc1	rB1, 488(pB1)	madd.d	rC02, rC02, rA0, rB2	madd.d	rC12, rC12, rA1, rB2					ldc1	rz2, 496(pA2)	madd.d	rC22, rC22, rA2, rB2	madd.d	rC32, rC32, rA3, rB2					ldc1	rB2, 488(pB2)	madd.d	rC03, rC03, rA0, rB3	madd.d	rC13, rC13, rA1, rB3					ldc1	rz3, 496(pA3)	madd.d	rC23, rC23, rA2, rB3	madd.d	rC33, rC33, rA3, rB3					ldc1	rB3, 488(pB3)	madd.d	rC00, rC00, ra0, rB0	madd.d	rC10, rC10, ra1, rB0					ldc1	rA0, 504(pA0)	madd.d	rC20, rC20, ra2, rB0	madd.d	rC30, rC30, ra3, rB0					ldc1	rB0, 496(pB0)	madd.d	rC01, rC01, ra0, rB1	madd.d	rC11, rC11, ra1, rB1					ldc1	rA1, 504(pA1)	madd.d	rC21, rC21, ra2, rB1	madd.d	rC31, rC31, ra3, rB1					ldc1	rB1, 496(pB1)	madd.d	rC02, rC02, ra0, rB2	madd.d	rC12, rC12, ra1, rB2					ldc1	rA2, 504(pA2)	madd.d	rC22, rC22, ra2, rB2	madd.d	rC32, rC32, ra3, rB2					ldc1	rB2, 496(pB2)	madd.d	rC03, rC03, ra0, rB3	madd.d	rC13, rC13, ra1, rB3					ldc1	rA3, 504(pA3)	madd.d	rC23, rC23, ra2, rB3	madd.d	rC33, rC33, ra3, rB3					ldc1	rB3, 496(pB3)	madd.d	rC00, rC00, rz0, rB0	madd.d	rC10, rC10, rz1, rB0					ldc1	ra0, 512(pA0)	madd.d	rC20, rC20, rz2, rB0	madd.d	rC30, rC30, rz3, rB0					ldc1	rB0, 504(pB0)	madd.d	rC01, rC01, rz0, rB1	madd.d	rC11, rC11, rz1, rB1					ldc1	ra1, 512(pA1)	madd.d	rC21, rC21, rz2, rB1	madd.d	rC31, rC31, rz3, rB1					ldc1	rB1, 504(pB1)	madd.d	rC02, rC02, rz0, rB2	madd.d	rC12, rC12, rz1, rB2					ldc1	ra2, 512(pA2)	madd.d	rC22, rC22, rz2, rB2	madd.d	rC32, rC32, rz3, rB2					ldc1	rB2, 504(pB2)	madd.d	rC03, rC03, rz0, rB3	madd.d	rC13, rC13, rz1, rB3					ldc1	ra3, 512(pA3)	madd.d	rC23, rC23, rz2, rB3	madd.d	rC33, rC33, rz3, rB3					ldc1	rB3, 504(pB3)#endif#if KB > 66	madd.d	rC00, rC00, rA0, rB0	madd.d	rC10, rC10, rA1, rB0					ldc1	rz0, 520(pA0)	madd.d	rC20, rC20, rA2, rB0	madd.d	rC30, rC30, rA3, rB0					ldc1	rB0, 512(pB0)	madd.d	rC01, rC01, rA0, rB1	madd.d	rC11, rC11, rA1, rB1					ldc1	rz1, 520(pA1)	madd.d	rC21, rC21, rA2, rB1	madd.d	rC31, rC31, rA3, rB1					ldc1	rB1, 512(pB1)	madd.d	rC02, rC02, rA0, rB2	madd.d	rC12, rC12, rA1, rB2					ldc1	rz2, 520(pA2)	madd.d	rC22, rC22, rA2, rB2	madd.d	rC32, rC32, rA3, rB2					ldc1	rB2, 512(pB2)	madd.d	rC03, rC03, rA0, rB3	madd.d	rC13, rC13, rA1, rB3					ldc1	rz3, 520(pA3)	madd.d	rC23, rC23, rA2, rB3	madd.d	rC33, rC33, rA3, rB3					ldc1	rB3, 512(pB3)	madd.d	rC00, rC00, ra0, rB0	madd.d	rC10, rC10, ra1, rB0					ldc1	rA0, 528(pA0)	madd.d	rC20, rC20, ra2, rB0	madd.d	rC30, rC30, ra3, rB0					ldc1	rB0, 520(pB0)	madd.d	rC01, rC01, ra0, rB1	madd.d	rC11, rC11, ra1, rB1					ldc1	rA1, 528(pA1)	madd.d	rC21, rC21, ra2, rB1	madd.d	rC31, rC31, ra3, rB1					ldc1	rB1, 520(pB1)	madd.d	rC02, rC02, ra0, rB2	madd.d	rC12, rC12, ra1, rB2					ldc1	rA2, 528(pA2)	madd.d	rC22, rC22, ra2, rB2	madd.d	rC32, rC32, ra3, rB2					ldc1	rB2, 520(pB2)	madd.d	rC03, rC03, ra0, rB3	madd.d	rC13, rC13, ra1, rB3					ldc1	rA3, 528(pA3)	madd.d	rC23, rC23, ra2, rB3	madd.d	rC33, rC33, ra3, rB3					ldc1	rB3, 520(pB3)	madd.d	rC00, rC00, rz0, rB0	madd.d	rC10, rC10, rz1, rB0					ldc1	ra0, 536(pA0)	madd.d	rC20, rC20, rz2, rB0	madd.d	rC30, rC30, rz3, rB0					ldc1	rB0, 528(pB0)	madd.d	rC01, rC01, rz0, rB1	madd.d	rC11, rC11, rz1, rB1					ldc1	ra1, 536(pA1)	madd.d	rC21, rC21, rz2, rB1	madd.d	rC31, rC31, rz3, rB1					ldc1	rB1, 528(pB1)	madd.d	rC02, rC02, rz0, rB2	madd.d	rC12, rC12, rz1, rB2					ldc1	ra2, 536(pA2)	madd.d	rC22, rC22, rz2, rB2	madd.d	rC32, rC32, rz3, rB2					ldc1	rB2, 528(pB2)	madd.d	rC03, rC03, rz0, rB3	madd.d	rC13, rC13, rz1, rB3					ldc1	ra3, 536(pA3)	madd.d	rC23, rC23, rz2, rB3	madd.d	rC33, rC33, rz3, rB3					ldc1	rB3, 528(pB3)#endif#if KB > 69	madd.d	rC00, rC00, rA0, rB0	madd.d	rC10, rC10, rA1, rB0					ldc1	rz0, 544(pA0)	madd.d	rC20, rC20, rA2, rB0	madd.d	rC30, rC30, rA3, rB0					ldc1	rB0, 536(pB0)	madd.d	rC01, rC01, rA0, rB1	madd.d	rC11, rC11, rA1, rB1					ldc1	rz1, 544(pA1)	madd.d	rC21, rC21, rA2, rB1	madd.d	rC31, rC31, rA3, rB1					ldc1	rB1, 536(pB1)	madd.d	rC02, rC02, rA0, rB2	madd.d	rC12, rC12, rA1, rB2					ldc1	rz2, 544(pA2)	madd.d	rC22, rC22, rA2, rB2	madd.d	rC32, rC32, rA3, rB2					ldc1	rB2, 536(pB2)	madd.d	rC03, rC03, rA0, rB3	madd.d	rC13, rC13, rA1, rB3					ldc1	rz3, 544(pA3)	madd.d	rC23, rC23, rA2, rB3	madd.d	rC33, rC33, rA3, rB3					ldc1	rB3, 536(pB3)	madd.d	rC00, rC00, ra0, rB0	madd.d	rC10, rC10, ra1, rB0					ldc1	rA0, 552(pA0)	madd.d	rC20, rC20, ra2, rB0	madd.d	rC30, rC30, ra3, rB0					ldc1	rB0, 544(pB0)	madd.d	rC01, rC01, ra0, rB1	madd.d	rC11, rC11, ra1, rB1					ldc1	rA1, 552(pA1)	madd.d	rC21, rC21, ra2, rB1	madd.d	rC31, rC31, ra3, rB1					ldc1	rB1, 544(pB1)	madd.d	rC02, rC02, ra0, rB2	madd.d	rC12, rC12, ra1, rB2					ldc1	rA2, 552(pA2)	madd.d	rC22, rC22, ra2, rB2	madd.d	rC32, rC32, ra3, rB2					ldc1	rB2, 544(pB2)	madd.d	rC03, rC03, ra0, rB3	madd.d	rC13, rC13, ra1, rB3					ldc1	rA3, 552(pA3)	madd.d	rC23, rC23, ra2, rB3	madd.d	rC33, rC33, ra3, rB3					ldc1	rB3, 544(pB3)	madd.d	rC00, rC00, rz0, rB0	madd.d	rC10, rC10, rz1, rB0					ldc1	ra0, 560(pA0)	madd.d	rC20, rC20, rz2, rB0	madd.d	rC30, rC30, rz3, rB0					ldc1	rB0, 552(pB0)	madd.d	rC01, rC01, rz0, rB1	madd.d	rC11, rC11, rz1, rB1					ldc1	ra1, 560(pA1)	madd.d	rC21, rC21, rz2, rB1	madd.d	rC31, rC31, rz3, rB1					ldc1	rB1, 552(pB1)	madd.d	rC02, rC02, rz0, rB2	madd.d	rC12, rC12, rz1, rB2					ldc1	ra2, 560(pA2)	madd.d	rC22, rC22, rz2, rB2	madd.d	rC32, rC32, rz3, rB2					ldc1	rB2, 552(pB2)	madd.d	rC03, rC03, rz0, rB3	madd.d	rC13, rC13, rz1, rB3					ldc1	ra3, 560(pA3)	madd.d	rC23, rC23, rz2, rB3	madd.d	rC33, rC33, rz3, rB3					ldc1	rB3, 552(pB3)#endif#if KB > 72	madd.d	rC00, rC00, rA0, rB0	madd.d	rC10, rC10, rA1, rB0					ldc1	rz0, 568(pA0)	madd.d	rC20, rC20, rA2, rB0	madd.d	rC30, rC30, rA3, rB0					ldc1	rB0, 560(pB0)	madd.d	rC01, rC01, rA0, rB1	madd.d	rC11, rC11, rA1, rB1					ldc1	rz1, 568(pA1)	madd.d	rC21, rC21, rA2, rB1	madd.d	rC31, rC31, rA3, rB1					ldc1	rB1, 560(pB1)	madd.d	rC02, rC02, rA0, rB2	madd.d	rC12, rC12, rA1, rB2					ldc1	rz2, 568(pA2)	madd.d	rC22, rC22, rA2, rB2	madd.d	rC32, rC32, rA3, rB2					ldc1	rB2, 560(pB2)	madd.d	rC03, rC03, rA0, rB3	madd.d	rC13, rC13, rA1, rB3					ldc1	rz3, 568(pA3)	madd.d	rC23, rC23, rA2, rB3	madd.d	rC33, rC33, rA3, rB3					ldc1	rB3, 560(pB3)	madd.d	rC00, rC00, ra0, rB0	madd.d	rC10, rC10, ra1, rB0					ldc1	rA0, 576(pA0)	madd.d	rC20, rC20, ra2, rB0	madd.d	rC30, rC30, ra3, rB0					ldc1	rB0, 568(pB0)	madd.d	rC01, rC01, ra0, rB1	madd.d	rC11, rC11, ra1, rB1					ldc1	rA1, 576(pA1)	madd.d	rC21, rC21, ra2, rB1	madd.d	rC31, rC31, ra3, rB1					ldc1	rB1, 568(pB1)	madd.d	rC02, rC02, ra0, rB2	madd.d	rC12, rC12, ra1, rB2					ldc1	rA2, 576(pA2)	madd.d	rC22, rC22, ra2, rB2	madd.d	rC32, rC32, ra3, rB2					ldc1	rB2, 568(pB2)	madd.d	rC03, rC03, ra0, rB3	madd.d	rC13, rC13, ra1, rB3					ldc1	rA3, 576(pA3)	madd.d	rC23, rC23, ra2, rB3	madd.d	rC33, rC33, ra3, rB3					ldc1	rB3, 568(pB3)	madd.d	rC00, rC00, rz0, rB0	madd.d	rC10, rC10, rz1, rB0					ldc1	ra0, 584(pA0)	madd.d	rC20, rC20, rz2, rB0	madd.d	rC30, rC30, rz3, rB0					ldc1	rB0, 576(pB0)	madd.d	rC01, rC01, rz0, rB1	madd.d	rC11, rC11, rz1, rB1					ldc1	ra1, 584(pA1)	madd.d	rC21, rC21, rz2, rB1	madd.d	rC31, rC31, rz3, rB1					ldc1	rB1, 576(pB1)	madd.d	rC02, rC02, rz0, rB2	madd.d	rC12, rC12, rz1, rB2					ldc1	ra2, 584(pA2)	madd.d	rC22, rC22, rz2, rB2	madd.d	rC32, rC32, rz3, rB2					ldc1	rB2, 576(pB2)	madd.d	rC03, rC03, rz0, rB3	madd.d	rC13, rC13, rz1, rB3					ldc1	ra3, 584(pA3)	madd.d	rC23, rC23, rz2, rB3	madd.d	rC33, rC33, rz3, rB3					ldc1	rB3, 576(pB3)#endif/*       daddiu  K, K, 8         bne K, K0, KLOOP         madd.d  rC33, rC33, ra3, rb3 /*   /* in delay slot! *//* *      Drain ld/use pipe */	madd.d	rC00, rC00, rA0, rB0					daddiu  pC0, pC0, CMUL(32)	madd.d	rC10, rC10, rA1, rB0					ldc1	rz0, KB*8-8(pA0)	madd.d	rC20, rC20, rA2, rB0					daddu   pA0, pA0, incAm	madd.d	rC30, rC30, rA3, rB0					ldc1	rB0, KB*8-16(pB0)	madd.d	rC01, rC01, rA0, rB1					daddiu  pC1, pC1, CMUL(32)	madd.d	rC11, rC11, rA1, rB1					ldc1	rz1, KB*8-8(pA1)	madd.d	rC21, rC21, rA2, rB1					daddu   pA1, pA1, incAm	madd.d	rC31, rC31, rA3, rB1					ldc1	rB1, KB*8-16(pB1)	madd.d	rC02, rC02, rA0, rB2					daddiu  pC2, pC2, CMUL(32)	madd.d	rC12, rC12, rA1, rB2					ldc1	rz2, KB*8-8(pA2)	madd.d	rC22, rC22, rA2, rB2					daddu   pA2, pA2, incAm	madd.d	rC32, rC32, rA3, rB2					ldc1	rB2, KB*8-16(pB2)	madd.d	rC03, rC03, rA0, rB3					daddiu  pC3, pC3, CMUL(32)	madd.d	rC13, rC13, rA1, rB3					ldc1	rz3, KB*8-8(pA3)	madd.d	rC23, rC23, rA2, rB3					daddu   pA3, pA3, incAm	madd.d	rC33, rC33, rA3, rB3					ldc1	rB3, KB*8-16(pB3)	madd.d	rC00, rC00, ra0, rB0	madd.d	rC10, rC10, ra1, rB0	madd.d	rC20, rC20, ra2, rB0	madd.d	rC30, rC30, ra3, rB0					ldc1	rB0, KB*8-8(pB0)	madd.d	rC01, rC01, ra0, rB1	madd.d	rC11, rC11, ra1, rB1	madd.d	rC21, rC21, ra2, rB1	madd.d	rC31, rC31, ra3, rB1					ldc1	rB1, KB*8-8(pB1)	madd.d	rC02, rC02, ra0, rB2	madd.d	rC12, rC12, ra1, rB2	madd.d	rC22, rC22, ra2, rB2	madd.d	rC32, rC32, ra3, rB2					ldc1	rB2, KB*8-8(pB2)	madd.d	rC03, rC03, ra0, rB3	madd.d	rC13, rC13, ra1, rB3	madd.d	rC23, rC23, ra2, rB3	madd.d	rC33, rC33, ra3, rB3					ldc1	rB3, KB*8-8(pB3)	madd.d	rC00, rC00, rz0, rB0	madd.d	rC10, rC10, rz1, rB0	madd.d	rC20, rC20, rz2, rB0	madd.d	rC30, rC30, rz3, rB0	madd.d	rC01, rC01, rz0, rB1        				sdc1    rC00, -CMUL(32)(pC0)	madd.d	rC11, rC11, rz1, rB1        				sdc1    rC10, -CMUL(24)(pC0)	madd.d	rC21, rC21, rz2, rB1        				sdc1    rC20, -CMUL(16)(pC0)	madd.d	rC31, rC31, rz3, rB1        				sdc1    rC30, -CMUL(8)(pC0)	madd.d	rC02, rC02, rz0, rB2        				sdc1    rC01, -CMUL(32)(pC1)	madd.d	rC12, rC12, rz1, rB2        				sdc1    rC11, -CMUL(24)(pC1)	madd.d	rC22, rC22, rz2, rB2        				sdc1    rC21, -CMUL(16)(pC1)	madd.d	rC32, rC32, rz3, rB2        				sdc1    rC31, -CMUL(8)(pC1)	madd.d	rC03, rC03, rz0, rB3        				sdc1    rC02, -CMUL(32)(pC2)	madd.d	rC13, rC13, rz1, rB3        				sdc1    rC12, -CMUL(24)(pC2)	madd.d	rC23, rC23, rz2, rB3        				sdc1    rC22, -CMUL(16)(pC2)	madd.d	rC33, rC33, rz3, rB3        				sdc1    rC32, -CMUL(8)(pC2)        sdc1    rC03, -CMUL(32)(pC3)        sdc1    rC13, -CMUL(24)(pC3)        sdc1    rC23, -CMUL(16)(pC3)        bne pA0, stAm, MLOOP        sdc1    rC33, -CMUL(8)(pC3)        dsubu   pA0, pA0, incAn        dsubu   pA1, pA1, incAn        dsubu   pA2, pA2, incAn        dsubu   pA3, pA3, incAn	daddu	pC0, pC0, incCn	daddu	pC1, pC1, incCn	daddu	pC2, pC2, incCn	daddu	pC3, pC3, incCn        daddu   pB0, pB0, incBn        daddu   pB1, pB1, incBn        daddu   pB2, pB2, incBn        bne pB0, stBn, NLOOP        daddu   pB3, pB3, incBn         /* delay slot! */DONE:/* *      Epilogue: restore registers and return */        ld      $16, 0($sp)        ld      $17, 8($sp)        ld      $18, 16($sp)        ld      $19, 24($sp)        ld      $20, 32($sp)        ld      $21, 40($sp)        ld      $22, 64($sp)        ld      $23, 72($sp)        ld      $30, 80($sp)#ifdef ATL_USE64BITS        ldc1    $f24, 88($sp)        ldc1    $f25, 96($sp)        ldc1    $f26, 104($sp)        ldc1    $f27, 112($sp)        ldc1    $f28, 120($sp)        ldc1    $f29, 128($sp)        ldc1    $f30, 136($sp)        ldc1    $f31, 144($sp)#else        ldc1    $f20, 88($sp)        ldc1    $f22, 96($sp)        ldc1    $f24, 104($sp)        ldc1    $f26, 112($sp)        ldc1    $f28, 120($sp)        ldc1    $f30, 128($sp)#endif        j       $31        daddiu  $sp, $sp, FSIZE         /* delay slot of return statement *//*      end of file MIPS assembler BS */        .set    macro        .set    reorder        .set    at#ifndef ATL_OS_IRIX        .size   ATL_USERMM,.-ATL_USERMM#endif        .end    ATL_USERMM

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?