atl_dmm4x4x32_ppc.c
来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 2,609 行 · 第 1/5 页
C
2,609 行
fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 352+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 352+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 352+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 352+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 352+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 352+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 352+KB3*8(pA0)#endif#if KB > 45 fmadd rC00, ra0, rb0, rC00 lfd rB3, 352+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 360+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 360+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 360+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 360+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 360+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 360+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 360+KB3*8(pA0)#endif#if KB > 46 fmadd rC00, rA0, rB0, rC00 lfd rb3, 360+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 368+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 368+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 368+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 368+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 368+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 368+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 368+KB3*8(pA0)#endif#if KB > 47 fmadd rC00, ra0, rb0, rC00 lfd rB3, 368+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 376+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 376+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 376+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 376+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 376+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 376+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 376+KB3*8(pA0)#endif#if KB > 48 fmadd rC00, rA0, rB0, rC00 lfd rb3, 376+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 384+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 384+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 384+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 384+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 384+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 384+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 384+KB3*8(pA0)#endif#if KB > 49 fmadd rC00, ra0, rb0, rC00 lfd rB3, 384+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 392+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 392+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 392+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 392+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 392+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 392+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 392+KB3*8(pA0)#endif#if KB > 50 fmadd rC00, rA0, rB0, rC00 lfd rb3, 392+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 400+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 400+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 400+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 400+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 400+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 400+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 400+KB3*8(pA0)#endif#if KB > 51 fmadd rC00, ra0, rb0, rC00 lfd rB3, 400+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 408+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 408+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 408+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 408+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 408+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 408+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 408+KB3*8(pA0)#endif#if KB > 52 fmadd rC00, rA0, rB0, rC00 lfd rb3, 408+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 416+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 416+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 416+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 416+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 416+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 416+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 416+KB3*8(pA0)#endif#if KB > 53 fmadd rC00, ra0, rb0, rC00 lfd rB3, 416+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 424+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 424+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 424+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 424+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 424+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 424+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 424+KB3*8(pA0)#endif#if KB > 54 fmadd rC00, rA0, rB0, rC00 lfd rb3, 424+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 432+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 432+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 432+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 432+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 432+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 432+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 432+KB3*8(pA0)#endif#if KB > 55 fmadd rC00, ra0, rb0, rC00 lfd rB3, 432+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 440+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 440+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 440+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 440+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 440+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 440+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 440+KB3*8(pA0)#endif#if KB > 56 fmadd rC00, rA0, rB0, rC00 lfd rb3, 440+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 448+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 448+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 448+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 448+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 448+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 448+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 448+KB3*8(pA0)#endif#if KB > 57 fmadd rC00, ra0, rb0, rC00 lfd rB3, 448+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 456+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 456+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 456+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 456+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 456+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 456+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 456+KB3*8(pA0)#endif#if KB > 58 fmadd rC00, rA0, rB0, rC00 lfd rb3, 456+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 464+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 464+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 464+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 464+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 464+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 464+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 464+KB3*8(pA0)#endif#if KB > 59 fmadd rC00, ra0, rb0, rC00 lfd rB3, 464+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 472+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 472+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 472+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 472+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 472+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 472+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 472+KB3*8(pA0)#endif#if KB > 60 fmadd rC00, rA0, rB0, rC00 lfd rb3, 472+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 480+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 480+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 480+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 480+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 480+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 480+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 480+KB3*8(pA0)#endif#if KB > 61 fmadd rC00, ra0, rb0, rC00 lfd rB3, 480+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 488+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 488+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 488+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 488+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 488+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 488+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 488+KB3*8(pA0)#endif#if KB > 62 fmadd rC00, rA0, rB0, rC00 lfd rb3, 488+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 496+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 496+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 496+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 496+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 496+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 496+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 496+KB3*8(pA0)#endif#if KB > 63 fmadd rC00, ra0, rb0, rC00 lfd rB3, 496+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 504+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 504+KB1*8(pB0)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?