atl_dmm4x4x32_ppc.c

来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 2,609 行 · 第 1/5 页

C
2,609
字号
	fmadd	rC23, rA2, rB3, rC23	lfd	rA2, 192+KB2*8(pA0)	fmadd	rC33, rA3, rB3, rC33	lfd	rA3, 192+KB3*8(pA0)#endif#if KB > 25	fmadd	rC00, ra0, rb0, rC00	lfd	rB3, 192+KB3*8(pB0)	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30	lfd	rb0, 200+KB0*8(pB0)	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	lfd	rb1, 200+KB1*8(pB0)	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	lfd	rb2, 200+KB2*8(pB0)	fmadd	rC03, ra0, rb3, rC03	lfd	ra0, 200+KB0*8(pA0)	fmadd	rC13, ra1, rb3, rC13	lfd	ra1, 200+KB1*8(pA0)	fmadd	rC23, ra2, rb3, rC23	lfd	ra2, 200+KB2*8(pA0)	fmadd	rC33, ra3, rb3, rC33	lfd	ra3, 200+KB3*8(pA0)#endif#if KB > 26	fmadd	rC00, rA0, rB0, rC00	lfd	rb3, 200+KB3*8(pB0)	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30	lfd	rB0, 208+KB0*8(pB0)	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	lfd	rB1, 208+KB1*8(pB0)	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	lfd	rB2, 208+KB2*8(pB0)	fmadd	rC03, rA0, rB3, rC03	lfd	rA0, 208+KB0*8(pA0)	fmadd	rC13, rA1, rB3, rC13	lfd	rA1, 208+KB1*8(pA0)	fmadd	rC23, rA2, rB3, rC23	lfd	rA2, 208+KB2*8(pA0)	fmadd	rC33, rA3, rB3, rC33	lfd	rA3, 208+KB3*8(pA0)#endif#if KB > 27	fmadd	rC00, ra0, rb0, rC00	lfd	rB3, 208+KB3*8(pB0)	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30	lfd	rb0, 216+KB0*8(pB0)	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	lfd	rb1, 216+KB1*8(pB0)	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	lfd	rb2, 216+KB2*8(pB0)	fmadd	rC03, ra0, rb3, rC03	lfd	ra0, 216+KB0*8(pA0)	fmadd	rC13, ra1, rb3, rC13	lfd	ra1, 216+KB1*8(pA0)	fmadd	rC23, ra2, rb3, rC23	lfd	ra2, 216+KB2*8(pA0)	fmadd	rC33, ra3, rb3, rC33	lfd	ra3, 216+KB3*8(pA0)#endif#if KB > 28	fmadd	rC00, rA0, rB0, rC00	lfd	rb3, 216+KB3*8(pB0)	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30	lfd	rB0, 224+KB0*8(pB0)	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	lfd	rB1, 224+KB1*8(pB0)	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	lfd	rB2, 224+KB2*8(pB0)	fmadd	rC03, rA0, rB3, rC03	lfd	rA0, 224+KB0*8(pA0)	fmadd	rC13, rA1, rB3, rC13	lfd	rA1, 224+KB1*8(pA0)	fmadd	rC23, rA2, rB3, rC23	lfd	rA2, 224+KB2*8(pA0)	fmadd	rC33, rA3, rB3, rC33	lfd	rA3, 224+KB3*8(pA0)#endif#if KB > 29	fmadd	rC00, ra0, rb0, rC00	lfd	rB3, 224+KB3*8(pB0)	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30	lfd	rb0, 232+KB0*8(pB0)	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	lfd	rb1, 232+KB1*8(pB0)	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	lfd	rb2, 232+KB2*8(pB0)	fmadd	rC03, ra0, rb3, rC03	lfd	ra0, 232+KB0*8(pA0)	fmadd	rC13, ra1, rb3, rC13	lfd	ra1, 232+KB1*8(pA0)	fmadd	rC23, ra2, rb3, rC23	lfd	ra2, 232+KB2*8(pA0)	fmadd	rC33, ra3, rb3, rC33	lfd	ra3, 232+KB3*8(pA0)#endif#if KB > 30	fmadd	rC00, rA0, rB0, rC00	lfd	rb3, 232+KB3*8(pB0)	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30	lfd	rB0, 240+KB0*8(pB0)	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	lfd	rB1, 240+KB1*8(pB0)	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	lfd	rB2, 240+KB2*8(pB0)	fmadd	rC03, rA0, rB3, rC03	lfd	rA0, 240+KB0*8(pA0)	fmadd	rC13, rA1, rB3, rC13	lfd	rA1, 240+KB1*8(pA0)	fmadd	rC23, rA2, rB3, rC23	lfd	rA2, 240+KB2*8(pA0)	fmadd	rC33, rA3, rB3, rC33	lfd	rA3, 240+KB3*8(pA0)#endif#if KB > 31	fmadd	rC00, ra0, rb0, rC00	lfd	rB3, 240+KB3*8(pB0)	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30	lfd	rb0, 248+KB0*8(pB0)	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	lfd	rb1, 248+KB1*8(pB0)	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	lfd	rb2, 248+KB2*8(pB0)	fmadd	rC03, ra0, rb3, rC03	lfd	ra0, 248+KB0*8(pA0)	fmadd	rC13, ra1, rb3, rC13	lfd	ra1, 248+KB1*8(pA0)	fmadd	rC23, ra2, rb3, rC23	lfd	ra2, 248+KB2*8(pA0)	fmadd	rC33, ra3, rb3, rC33	lfd	ra3, 248+KB3*8(pA0)#endif#if KB > 32	fmadd	rC00, rA0, rB0, rC00	lfd	rb3, 248+KB3*8(pB0)	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30	lfd	rB0, 256+KB0*8(pB0)	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	lfd	rB1, 256+KB1*8(pB0)	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	lfd	rB2, 256+KB2*8(pB0)	fmadd	rC03, rA0, rB3, rC03	lfd	rA0, 256+KB0*8(pA0)	fmadd	rC13, rA1, rB3, rC13	lfd	rA1, 256+KB1*8(pA0)	fmadd	rC23, rA2, rB3, rC23	lfd	rA2, 256+KB2*8(pA0)	fmadd	rC33, rA3, rB3, rC33	lfd	rA3, 256+KB3*8(pA0)#endif#if KB > 33	fmadd	rC00, ra0, rb0, rC00	lfd	rB3, 256+KB3*8(pB0)	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30	lfd	rb0, 264+KB0*8(pB0)	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	lfd	rb1, 264+KB1*8(pB0)	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	lfd	rb2, 264+KB2*8(pB0)	fmadd	rC03, ra0, rb3, rC03	lfd	ra0, 264+KB0*8(pA0)	fmadd	rC13, ra1, rb3, rC13	lfd	ra1, 264+KB1*8(pA0)	fmadd	rC23, ra2, rb3, rC23	lfd	ra2, 264+KB2*8(pA0)	fmadd	rC33, ra3, rb3, rC33	lfd	ra3, 264+KB3*8(pA0)#endif#if KB > 34	fmadd	rC00, rA0, rB0, rC00	lfd	rb3, 264+KB3*8(pB0)	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30	lfd	rB0, 272+KB0*8(pB0)	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	lfd	rB1, 272+KB1*8(pB0)	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	lfd	rB2, 272+KB2*8(pB0)	fmadd	rC03, rA0, rB3, rC03	lfd	rA0, 272+KB0*8(pA0)	fmadd	rC13, rA1, rB3, rC13	lfd	rA1, 272+KB1*8(pA0)	fmadd	rC23, rA2, rB3, rC23	lfd	rA2, 272+KB2*8(pA0)	fmadd	rC33, rA3, rB3, rC33	lfd	rA3, 272+KB3*8(pA0)#endif#if KB > 35	fmadd	rC00, ra0, rb0, rC00	lfd	rB3, 272+KB3*8(pB0)	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30	lfd	rb0, 280+KB0*8(pB0)	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	lfd	rb1, 280+KB1*8(pB0)	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	lfd	rb2, 280+KB2*8(pB0)	fmadd	rC03, ra0, rb3, rC03	lfd	ra0, 280+KB0*8(pA0)	fmadd	rC13, ra1, rb3, rC13	lfd	ra1, 280+KB1*8(pA0)	fmadd	rC23, ra2, rb3, rC23	lfd	ra2, 280+KB2*8(pA0)	fmadd	rC33, ra3, rb3, rC33	lfd	ra3, 280+KB3*8(pA0)#endif#if KB > 36	fmadd	rC00, rA0, rB0, rC00	lfd	rb3, 280+KB3*8(pB0)	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30	lfd	rB0, 288+KB0*8(pB0)	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	lfd	rB1, 288+KB1*8(pB0)	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	lfd	rB2, 288+KB2*8(pB0)	fmadd	rC03, rA0, rB3, rC03	lfd	rA0, 288+KB0*8(pA0)	fmadd	rC13, rA1, rB3, rC13	lfd	rA1, 288+KB1*8(pA0)	fmadd	rC23, rA2, rB3, rC23	lfd	rA2, 288+KB2*8(pA0)	fmadd	rC33, rA3, rB3, rC33	lfd	rA3, 288+KB3*8(pA0)#endif#if KB > 37	fmadd	rC00, ra0, rb0, rC00	lfd	rB3, 288+KB3*8(pB0)	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30	lfd	rb0, 296+KB0*8(pB0)	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	lfd	rb1, 296+KB1*8(pB0)	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	lfd	rb2, 296+KB2*8(pB0)	fmadd	rC03, ra0, rb3, rC03	lfd	ra0, 296+KB0*8(pA0)	fmadd	rC13, ra1, rb3, rC13	lfd	ra1, 296+KB1*8(pA0)	fmadd	rC23, ra2, rb3, rC23	lfd	ra2, 296+KB2*8(pA0)	fmadd	rC33, ra3, rb3, rC33	lfd	ra3, 296+KB3*8(pA0)#endif#if KB > 38	fmadd	rC00, rA0, rB0, rC00	lfd	rb3, 296+KB3*8(pB0)	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30	lfd	rB0, 304+KB0*8(pB0)	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	lfd	rB1, 304+KB1*8(pB0)	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	lfd	rB2, 304+KB2*8(pB0)	fmadd	rC03, rA0, rB3, rC03	lfd	rA0, 304+KB0*8(pA0)	fmadd	rC13, rA1, rB3, rC13	lfd	rA1, 304+KB1*8(pA0)	fmadd	rC23, rA2, rB3, rC23	lfd	rA2, 304+KB2*8(pA0)	fmadd	rC33, rA3, rB3, rC33	lfd	rA3, 304+KB3*8(pA0)#endif#if KB > 39	fmadd	rC00, ra0, rb0, rC00	lfd	rB3, 304+KB3*8(pB0)	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30	lfd	rb0, 312+KB0*8(pB0)	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	lfd	rb1, 312+KB1*8(pB0)	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	lfd	rb2, 312+KB2*8(pB0)	fmadd	rC03, ra0, rb3, rC03	lfd	ra0, 312+KB0*8(pA0)	fmadd	rC13, ra1, rb3, rC13	lfd	ra1, 312+KB1*8(pA0)	fmadd	rC23, ra2, rb3, rC23	lfd	ra2, 312+KB2*8(pA0)	fmadd	rC33, ra3, rb3, rC33	lfd	ra3, 312+KB3*8(pA0)#endif#if KB > 40	fmadd	rC00, rA0, rB0, rC00	lfd	rb3, 312+KB3*8(pB0)	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30	lfd	rB0, 320+KB0*8(pB0)	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	lfd	rB1, 320+KB1*8(pB0)	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	lfd	rB2, 320+KB2*8(pB0)	fmadd	rC03, rA0, rB3, rC03	lfd	rA0, 320+KB0*8(pA0)	fmadd	rC13, rA1, rB3, rC13	lfd	rA1, 320+KB1*8(pA0)	fmadd	rC23, rA2, rB3, rC23	lfd	rA2, 320+KB2*8(pA0)	fmadd	rC33, rA3, rB3, rC33	lfd	rA3, 320+KB3*8(pA0)#endif#if KB > 41	fmadd	rC00, ra0, rb0, rC00	lfd	rB3, 320+KB3*8(pB0)	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30	lfd	rb0, 328+KB0*8(pB0)	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	lfd	rb1, 328+KB1*8(pB0)	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	lfd	rb2, 328+KB2*8(pB0)	fmadd	rC03, ra0, rb3, rC03	lfd	ra0, 328+KB0*8(pA0)	fmadd	rC13, ra1, rb3, rC13	lfd	ra1, 328+KB1*8(pA0)	fmadd	rC23, ra2, rb3, rC23	lfd	ra2, 328+KB2*8(pA0)	fmadd	rC33, ra3, rb3, rC33	lfd	ra3, 328+KB3*8(pA0)#endif#if KB > 42	fmadd	rC00, rA0, rB0, rC00	lfd	rb3, 328+KB3*8(pB0)	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30	lfd	rB0, 336+KB0*8(pB0)	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	lfd	rB1, 336+KB1*8(pB0)	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	lfd	rB2, 336+KB2*8(pB0)	fmadd	rC03, rA0, rB3, rC03	lfd	rA0, 336+KB0*8(pA0)	fmadd	rC13, rA1, rB3, rC13	lfd	rA1, 336+KB1*8(pA0)	fmadd	rC23, rA2, rB3, rC23	lfd	rA2, 336+KB2*8(pA0)	fmadd	rC33, rA3, rB3, rC33	lfd	rA3, 336+KB3*8(pA0)#endif#if KB > 43	fmadd	rC00, ra0, rb0, rC00	lfd	rB3, 336+KB3*8(pB0)	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30	lfd	rb0, 344+KB0*8(pB0)	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	lfd	rb1, 344+KB1*8(pB0)	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	lfd	rb2, 344+KB2*8(pB0)	fmadd	rC03, ra0, rb3, rC03	lfd	ra0, 344+KB0*8(pA0)	fmadd	rC13, ra1, rb3, rC13	lfd	ra1, 344+KB1*8(pA0)	fmadd	rC23, ra2, rb3, rC23	lfd	ra2, 344+KB2*8(pA0)	fmadd	rC33, ra3, rb3, rC33	lfd	ra3, 344+KB3*8(pA0)#endif#if KB > 44	fmadd	rC00, rA0, rB0, rC00	lfd	rb3, 344+KB3*8(pB0)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?