atl_dmm4x4xur2_mips.c

来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 2,473 行 · 第 1/5 页

C
2,473
字号
	madd.d	rC22, rC22, rA2, rB2					prefB(224+KB*8*4(pB0))	madd.d	rC32, rC32, rA3, rB2					prefB(224+KB*8*4(pB1))	madd.d	rC03, rC03, rA0, rB3					prefB(224+KB*8*4(pB2))	madd.d	rC13, rC13, rA1, rB3					prefB(224+KB*8*4(pB3))#if KB <= 32   #undef prefB   #define prefB(mem)#endif	madd.d	rC23, rC23, rA2, rB3	madd.d	rC33, rC33, rA3, rB3					ldc1	rB0, 192(pB0)	madd.d	rC00, rC00, ra0, rb0					ldc1	rA0, 192(pA0)	madd.d	rC10, rC10, ra1, rb0					ldc1	rA1, 192(pA1)	madd.d	rC20, rC20, ra2, rb0					ldc1	rA2, 192(pA2)	madd.d	rC30, rC30, ra3, rb0					ldc1	rA3, 192(pA3)	madd.d	rC01, rC01, ra0, rb1					ldc1	rB1, 192(pB1)	madd.d	rC11, rC11, ra1, rb1					ldc1	rB2, 192(pB2)	madd.d	rC21, rC21, ra2, rb1					ldc1	rB3, 192(pB3)	madd.d	rC31, rC31, ra3, rb1					prefB(256+KB*8*4(pB0))	madd.d	rC02, rC02, ra0, rb2					prefB(256+KB*8*4(pB1))	madd.d	rC12, rC12, ra1, rb2					prefB(256+KB*8*4(pB2))	madd.d	rC22, rC22, ra2, rb2					prefB(256+KB*8*4(pB3))#if KB <= 36   #undef prefB   #define prefB(mem)#endif	madd.d	rC32, rC32, ra3, rb2					prefB(288+KB*8*4(pB0))	madd.d	rC03, rC03, ra0, rb3					prefB(288+KB*8*4(pB1))	madd.d	rC13, rC13, ra1, rb3					prefB(288+KB*8*4(pB2))	madd.d	rC23, rC23, ra2, rb3					prefB(288+KB*8*4(pB3))#if KB <= 40   #undef prefB   #define prefB(mem)#endif	madd.d	rC33, rC33, ra3, rb3#endif#if KB > 26					ldc1	rb0, 200(pB0)	madd.d	rC00, rC00, rA0, rB0					ldc1	ra0, 200(pA0)	madd.d	rC10, rC10, rA1, rB0					ldc1	ra1, 200(pA1)	madd.d	rC20, rC20, rA2, rB0					ldc1	ra2, 200(pA2)	madd.d	rC30, rC30, rA3, rB0					ldc1	ra3, 200(pA3)	madd.d	rC01, rC01, rA0, rB1					ldc1	rb1, 200(pB1)	madd.d	rC11, rC11, rA1, rB1					ldc1	rb2, 200(pB2)	madd.d	rC21, rC21, rA2, rB1					ldc1	rb3, 200(pB3)	madd.d	rC31, rC31, rA3, rB1	madd.d	rC02, rC02, rA0, rB2	madd.d	rC12, rC12, rA1, rB2	madd.d	rC22, rC22, rA2, rB2					prefB(320+KB*8*4(pB0))	madd.d	rC32, rC32, rA3, rB2					prefB(320+KB*8*4(pB1))	madd.d	rC03, rC03, rA0, rB3					prefB(320+KB*8*4(pB2))	madd.d	rC13, rC13, rA1, rB3					prefB(320+KB*8*4(pB3))#if KB <= 44   #undef prefB   #define prefB(mem)#endif	madd.d	rC23, rC23, rA2, rB3	madd.d	rC33, rC33, rA3, rB3					ldc1	rB0, 208(pB0)	madd.d	rC00, rC00, ra0, rb0					ldc1	rA0, 208(pA0)	madd.d	rC10, rC10, ra1, rb0					ldc1	rA1, 208(pA1)	madd.d	rC20, rC20, ra2, rb0					ldc1	rA2, 208(pA2)	madd.d	rC30, rC30, ra3, rb0					ldc1	rA3, 208(pA3)	madd.d	rC01, rC01, ra0, rb1					ldc1	rB1, 208(pB1)	madd.d	rC11, rC11, ra1, rb1					ldc1	rB2, 208(pB2)	madd.d	rC21, rC21, ra2, rb1					ldc1	rB3, 208(pB3)	madd.d	rC31, rC31, ra3, rb1					prefC(CMUL(32)(pC0))	madd.d	rC02, rC02, ra0, rb2					prefC(CMUL(32)(pC1))	madd.d	rC12, rC12, ra1, rb2					prefC(CMUL(32)(pC2))	madd.d	rC22, rC22, ra2, rb2					prefC(CMUL(32)(pC3))	madd.d	rC32, rC32, ra3, rb2	madd.d	rC03, rC03, ra0, rb3	madd.d	rC13, rC13, ra1, rb3	madd.d	rC23, rC23, ra2, rb3	madd.d	rC33, rC33, ra3, rb3#endif#if KB > 28					ldc1	rb0, 216(pB0)	madd.d	rC00, rC00, rA0, rB0					ldc1	ra0, 216(pA0)	madd.d	rC10, rC10, rA1, rB0					ldc1	ra1, 216(pA1)	madd.d	rC20, rC20, rA2, rB0					ldc1	ra2, 216(pA2)	madd.d	rC30, rC30, rA3, rB0					ldc1	ra3, 216(pA3)	madd.d	rC01, rC01, rA0, rB1					ldc1	rb1, 216(pB1)	madd.d	rC11, rC11, rA1, rB1					ldc1	rb2, 216(pB2)	madd.d	rC21, rC21, rA2, rB1					ldc1	rb3, 216(pB3)	madd.d	rC31, rC31, rA3, rB1	madd.d	rC02, rC02, rA0, rB2	madd.d	rC12, rC12, rA1, rB2	madd.d	rC22, rC22, rA2, rB2					prefB(352+KB*8*4(pB0))	madd.d	rC32, rC32, rA3, rB2					prefB(352+KB*8*4(pB1))	madd.d	rC03, rC03, rA0, rB3					prefB(352+KB*8*4(pB2))	madd.d	rC13, rC13, rA1, rB3					prefB(352+KB*8*4(pB3))#if KB <= 48   #undef prefB   #define prefB(mem)#endif	madd.d	rC23, rC23, rA2, rB3	madd.d	rC33, rC33, rA3, rB3					ldc1	rB0, 224(pB0)	madd.d	rC00, rC00, ra0, rb0					ldc1	rA0, 224(pA0)	madd.d	rC10, rC10, ra1, rb0					ldc1	rA1, 224(pA1)	madd.d	rC20, rC20, ra2, rb0					ldc1	rA2, 224(pA2)	madd.d	rC30, rC30, ra3, rb0					ldc1	rA3, 224(pA3)	madd.d	rC01, rC01, ra0, rb1					ldc1	rB1, 224(pB1)	madd.d	rC11, rC11, ra1, rb1					ldc1	rB2, 224(pB2)	madd.d	rC21, rC21, ra2, rb1					ldc1	rB3, 224(pB3)	madd.d	rC31, rC31, ra3, rb1	madd.d	rC02, rC02, ra0, rb2	madd.d	rC12, rC12, ra1, rb2	madd.d	rC22, rC22, ra2, rb2					prefB(384+KB*8*4(pB0))	madd.d	rC32, rC32, ra3, rb2					prefB(384+KB*8*4(pB1))	madd.d	rC03, rC03, ra0, rb3					prefB(384+KB*8*4(pB2))	madd.d	rC13, rC13, ra1, rb3					prefB(384+KB*8*4(pB3))#if KB <= 52   #undef prefB   #define prefB(mem)#endif	madd.d	rC23, rC23, ra2, rb3	madd.d	rC33, rC33, ra3, rb3#endif#if KB > 30					ldc1	rb0, 232(pB0)	madd.d	rC00, rC00, rA0, rB0					ldc1	ra0, 232(pA0)	madd.d	rC10, rC10, rA1, rB0					ldc1	ra1, 232(pA1)	madd.d	rC20, rC20, rA2, rB0					ldc1	ra2, 232(pA2)	madd.d	rC30, rC30, rA3, rB0					ldc1	ra3, 232(pA3)	madd.d	rC01, rC01, rA0, rB1					ldc1	rb1, 232(pB1)	madd.d	rC11, rC11, rA1, rB1					ldc1	rb2, 232(pB2)	madd.d	rC21, rC21, rA2, rB1					ldc1	rb3, 232(pB3)	madd.d	rC31, rC31, rA3, rB1	madd.d	rC02, rC02, rA0, rB2	madd.d	rC12, rC12, rA1, rB2	madd.d	rC22, rC22, rA2, rB2	madd.d	rC32, rC32, rA3, rB2	madd.d	rC03, rC03, rA0, rB3	madd.d	rC13, rC13, rA1, rB3	madd.d	rC23, rC23, rA2, rB3	madd.d	rC33, rC33, rA3, rB3					ldc1	rB0, 240(pB0)	madd.d	rC00, rC00, ra0, rb0					ldc1	rA0, 240(pA0)	madd.d	rC10, rC10, ra1, rb0					ldc1	rA1, 240(pA1)	madd.d	rC20, rC20, ra2, rb0					ldc1	rA2, 240(pA2)	madd.d	rC30, rC30, ra3, rb0					ldc1	rA3, 240(pA3)	madd.d	rC01, rC01, ra0, rb1					ldc1	rB1, 240(pB1)	madd.d	rC11, rC11, ra1, rb1					ldc1	rB2, 240(pB2)	madd.d	rC21, rC21, ra2, rb1					ldc1	rB3, 240(pB3)	madd.d	rC31, rC31, ra3, rb1					prefB(416+KB*8*4(pB0))	madd.d	rC02, rC02, ra0, rb2					prefB(416+KB*8*4(pB1))	madd.d	rC12, rC12, ra1, rb2					prefB(416+KB*8*4(pB2))	madd.d	rC22, rC22, ra2, rb2					prefB(416+KB*8*4(pB3))#if KB <= 56   #undef prefB   #define prefB(mem)#endif	madd.d	rC32, rC32, ra3, rb2					prefB(448+KB*8*4(pB0))	madd.d	rC03, rC03, ra0, rb3					prefB(448+KB*8*4(pB1))	madd.d	rC13, rC13, ra1, rb3					prefB(448+KB*8*4(pB2))	madd.d	rC23, rC23, ra2, rb3					prefB(448+KB*8*4(pB3))	madd.d	rC33, rC33, ra3, rb3#endif#if KB > 32					ldc1	rb0, 248(pB0)	madd.d	rC00, rC00, rA0, rB0					ldc1	ra0, 248(pA0)	madd.d	rC10, rC10, rA1, rB0					ldc1	ra1, 248(pA1)	madd.d	rC20, rC20, rA2, rB0					ldc1	ra2, 248(pA2)	madd.d	rC30, rC30, rA3, rB0					ldc1	ra3, 248(pA3)	madd.d	rC01, rC01, rA0, rB1					ldc1	rb1, 248(pB1)	madd.d	rC11, rC11, rA1, rB1					ldc1	rb2, 248(pB2)	madd.d	rC21, rC21, rA2, rB1					ldc1	rb3, 248(pB3)	madd.d	rC31, rC31, rA3, rB1	madd.d	rC02, rC02, rA0, rB2	madd.d	rC12, rC12, rA1, rB2	madd.d	rC22, rC22, rA2, rB2	madd.d	rC32, rC32, rA3, rB2	madd.d	rC03, rC03, rA0, rB3	madd.d	rC13, rC13, rA1, rB3	madd.d	rC23, rC23, rA2, rB3	madd.d	rC33, rC33, rA3, rB3					ldc1	rB0, 256(pB0)	madd.d	rC00, rC00, ra0, rb0					ldc1	rA0, 256(pA0)	madd.d	rC10, rC10, ra1, rb0					ldc1	rA1, 256(pA1)	madd.d	rC20, rC20, ra2, rb0					ldc1	rA2, 256(pA2)	madd.d	rC30, rC30, ra3, rb0					ldc1	rA3, 256(pA3)	madd.d	rC01, rC01, ra0, rb1					ldc1	rB1, 256(pB1)	madd.d	rC11, rC11, ra1, rb1					ldc1	rB2, 256(pB2)	madd.d	rC21, rC21, ra2, rb1					ldc1	rB3, 256(pB3)	madd.d	rC31, rC31, ra3, rb1	madd.d	rC02, rC02, ra0, rb2	madd.d	rC12, rC12, ra1, rb2	madd.d	rC22, rC22, ra2, rb2	madd.d	rC32, rC32, ra3, rb2	madd.d	rC03, rC03, ra0, rb3	madd.d	rC13, rC13, ra1, rb3	madd.d	rC23, rC23, ra2, rb3	madd.d	rC33, rC33, ra3, rb3#endif#if KB > 34					ldc1	rb0, 264(pB0)	madd.d	rC00, rC00, rA0, rB0					ldc1	ra0, 264(pA0)	madd.d	rC10, rC10, rA1, rB0					ldc1	ra1, 264(pA1)	madd.d	rC20, rC20, rA2, rB0					ldc1	ra2, 264(pA2)	madd.d	rC30, rC30, rA3, rB0					ldc1	ra3, 264(pA3)	madd.d	rC01, rC01, rA0, rB1					ldc1	rb1, 264(pB1)	madd.d	rC11, rC11, rA1, rB1					ldc1	rb2, 264(pB2)	madd.d	rC21, rC21, rA2, rB1					ldc1	rb3, 264(pB3)	madd.d	rC31, rC31, rA3, rB1	madd.d	rC02, rC02, rA0, rB2	madd.d	rC12, rC12, rA1, rB2	madd.d	rC22, rC22, rA2, rB2	madd.d	rC32, rC32, rA3, rB2	madd.d	rC03, rC03, rA0, rB3	madd.d	rC13, rC13, rA1, rB3	madd.d	rC23, rC23, rA2, rB3	madd.d	rC33, rC33, rA3, rB3					ldc1	rB0, 272(pB0)	madd.d	rC00, rC00, ra0, rb0					ldc1	rA0, 272(pA0)	madd.d	rC10, rC10, ra1, rb0					ldc1	rA1, 272(pA1)	madd.d	rC20, rC20, ra2, rb0					ldc1	rA2, 272(pA2)	madd.d	rC30, rC30, ra3, rb0					ldc1	rA3, 272(pA3)	madd.d	rC01, rC01, ra0, rb1					ldc1	rB1, 272(pB1)	madd.d	rC11, rC11, ra1, rb1					ldc1	rB2, 272(pB2)	madd.d	rC21, rC21, ra2, rb1					ldc1	rB3, 272(pB3)	madd.d	rC31, rC31, ra3, rb1	madd.d	rC02, rC02, ra0, rb2	madd.d	rC12, rC12, ra1, rb2	madd.d	rC22, rC22, ra2, rb2	madd.d	rC32, rC32, ra3, rb2	madd.d	rC03, rC03, ra0, rb3	madd.d	rC13, rC13, ra1, rb3	madd.d	rC23, rC23, ra2, rb3	madd.d	rC33, rC33, ra3, rb3#endif#if KB > 36					ldc1	rb0, 280(pB0)	madd.d	rC00, rC00, rA0, rB0					ldc1	ra0, 280(pA0)	madd.d	rC10, rC10, rA1, rB0					ldc1	ra1, 280(pA1)	madd.d	rC20, rC20, rA2, rB0					ldc1	ra2, 280(pA2)	madd.d	rC30, rC30, rA3, rB0					ldc1	ra3, 280(pA3)	madd.d	rC01, rC01, rA0, rB1					ldc1	rb1, 280(pB1)	madd.d	rC11, rC11, rA1, rB1					ldc1	rb2, 280(pB2)	madd.d	rC21, rC21, rA2, rB1					ldc1	rb3, 280(pB3)	madd.d	rC31, rC31, rA3, rB1	madd.d	rC02, rC02, rA0, rB2	madd.d	rC12, rC12, rA1, rB2	madd.d	rC22, rC22, rA2, rB2	madd.d	rC32, rC32, rA3, rB2	madd.d	rC03, rC03, rA0, rB3	madd.d	rC13, rC13, rA1, rB3	madd.d	rC23, rC23, rA2, rB3	madd.d	rC33, rC33, rA3, rB3					ldc1	rB0, 288(pB0)	madd.d	rC00, rC00, ra0, rb0					ldc1	rA0, 288(pA0)	madd.d	rC10, rC10, ra1, rb0					ldc1	rA1, 288(pA1)	madd.d	rC20, rC20, ra2, rb0					ldc1	rA2, 288(pA2)	madd.d	rC30, rC30, ra3, rb0					ldc1	rA3, 288(pA3)	madd.d	rC01, rC01, ra0, rb1					ldc1	rB1, 288(pB1)	madd.d	rC11, rC11, ra1, rb1					ldc1	rB2, 288(pB2)	madd.d	rC21, rC21, ra2, rb1					ldc1	rB3, 288(pB3)	madd.d	rC31, rC31, ra3, rb1	madd.d	rC02, rC02, ra0, rb2	madd.d	rC12, rC12, ra1, rb2	madd.d	rC22, rC22, ra2, rb2	madd.d	rC32, rC32, ra3, rb2	madd.d	rC03, rC03, ra0, rb3	madd.d	rC13, rC13, ra1, rb3	madd.d	rC23, rC23, ra2, rb3	madd.d	rC33, rC33, ra3, rb3#endif#if KB > 38					ldc1	rb0, 296(pB0)	madd.d	rC00, rC00, rA0, rB0					ldc1	ra0, 296(pA0)	madd.d	rC10, rC10, rA1, rB0					ldc1	ra1, 296(pA1)	madd.d	rC20, rC20, rA2, rB0					ldc1	ra2, 296(pA2)	madd.d	rC30, rC30, rA3, rB0					ldc1	ra3, 296(pA3)	madd.d	rC01, rC01, rA0, rB1					ldc1	rb1, 296(pB1)	madd.d	rC11, rC11, rA1, rB1					ldc1	rb2, 296(pB2)	madd.d	rC21, rC21, rA2, rB1					ldc1	rb3, 296(pB3)	madd.d	rC31, rC31, rA3, rB1	madd.d	rC02, rC02, rA0, rB2	madd.d	rC12, rC12, rA1, rB2	madd.d	rC22, rC22, rA2, rB2	madd.d	rC32, rC32, rA3, rB2	madd.d	rC03, rC03, rA0, rB3	madd.d	rC13, rC13, rA1, rB3	madd.d	rC23, rC23, rA2, rB3	madd.d	rC33, rC33, rA3, rB3					ldc1	rB0, 304(pB0)	madd.d	rC00, rC00, ra0, rb0					ldc1	rA0, 304(pA0)	madd.d	rC10, rC10, ra1, rb0					ldc1	rA1, 304(pA1)	madd.d	rC20, rC20, ra2, rb0					ldc1	rA2, 304(pA2)	madd.d	rC30, rC30, ra3, rb0					ldc1	rA3, 304(pA3)	madd.d	rC01, rC01, ra0, rb1					ldc1	rB1, 304(pB1)	madd.d	rC11, rC11, ra1, rb1					ldc1	rB2, 304(pB2)	madd.d	rC21, rC21, ra2, rb1					ldc1	rB3, 304(pB3)	madd.d	rC31, rC31, ra3, rb1	madd.d	rC02, rC02, ra0, rb2	madd.d	rC12, rC12, ra1, rb2	madd.d	rC22, rC22, ra2, rb2	madd.d	rC32, rC32, ra3, rb2	madd.d	rC03, rC03, ra0, rb3	madd.d	rC13, rC13, ra1, rb3	madd.d	rC23, rC23, ra2, rb3	madd.d	rC33, rC33, ra3, rb3#endif#if KB > 40					ldc1	rb0, 312(pB0)	madd.d	rC00, rC00, rA0, rB0					ldc1	ra0, 312(pA0)	madd.d	rC10, rC10, rA1, rB0					ldc1	ra1, 312(pA1)	madd.d	rC20, rC20, rA2, rB0					ldc1	ra2, 312(pA2)	madd.d	rC30, rC30, rA3, rB0					ldc1	ra3, 312(pA3)	madd.d	rC01, rC01, rA0, rB1					ldc1	rb1, 312(pB1)	madd.d	rC11, rC11, rA1, rB1					ldc1	rb2, 312(pB2)	madd.d	rC21, rC21, rA2, rB1					ldc1	rb3, 312(pB3)	madd.d	rC31, rC31, rA3, rB1	madd.d	rC02, rC02, rA0, rB2	madd.d	rC12, rC12, rA1, rB2	madd.d	rC22, rC22, rA2, rB2	madd.d	rC32, rC32, rA3, rB2	madd.d	rC03, rC03, rA0, rB3	madd.d	rC13, rC13, rA1, rB3	madd.d	rC23, rC23, rA2, rB3	madd.d	rC33, rC33, rA3, rB3					ldc1	rB0, 320(pB0)	madd.d	rC00, rC00, ra0, rb0					ldc1	rA0, 320(pA0)	madd.d	rC10, rC10, ra1, rb0					ldc1	rA1, 320(pA1)	madd.d	rC20, rC20, ra2, rb0					ldc1	rA2, 320(pA2)	madd.d	rC30, rC30, ra3, rb0					ldc1	rA3, 320(pA3)	madd.d	rC01, rC01, ra0, rb1					ldc1	rB1, 320(pB1)	madd.d	rC11, rC11, ra1, rb1					ldc1	rB2, 320(pB2)	madd.d	rC21, rC21, ra2, rb1					ldc1	rB3, 320(pB3)	madd.d	rC31, rC31, ra3, rb1	madd.d	rC02, rC02, ra0, rb2	madd.d	rC12, rC12, ra1, rb2	madd.d	rC22, rC22, ra2, rb2	madd.d	rC32, rC32, ra3, rb2	madd.d	rC03, rC03, ra0, rb3	madd.d	rC13, rC13, ra1, rb3	madd.d	rC23, rC23, ra2, rb3	madd.d	rC33, rC33, ra3, rb3#endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?