atl_dmm4x4x32_ppc.c
来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 2,609 行 · 第 1/5 页
C
2,609 行
fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 40+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 40+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 40+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 40+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 40+KB3*8(pA0)#endif#if KB > 6 fmadd rC00, rA0, rB0, rC00 lfd rb3, 40+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 48+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 48+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 48+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 48+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 48+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 48+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 48+KB3*8(pA0)#endif#if KB > 7 fmadd rC00, ra0, rb0, rC00 lfd rB3, 48+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 56+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 56+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 56+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 56+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 56+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 56+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 56+KB3*8(pA0)#endif#if KB > 8 fmadd rC00, rA0, rB0, rC00 lfd rb3, 56+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 64+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 64+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 64+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 64+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 64+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 64+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 64+KB3*8(pA0)#endif#if KB > 9 fmadd rC00, ra0, rb0, rC00 lfd rB3, 64+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 72+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 72+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 72+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 72+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 72+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 72+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 72+KB3*8(pA0)#endif#if KB > 10 fmadd rC00, rA0, rB0, rC00 lfd rb3, 72+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 80+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 80+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 80+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 80+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 80+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 80+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 80+KB3*8(pA0)#endif#if KB > 11 fmadd rC00, ra0, rb0, rC00 lfd rB3, 80+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 88+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 88+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 88+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 88+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 88+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 88+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 88+KB3*8(pA0)#endif#if KB > 12 fmadd rC00, rA0, rB0, rC00 lfd rb3, 88+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 96+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 96+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 96+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 96+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 96+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 96+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 96+KB3*8(pA0)#endif#if KB > 13 fmadd rC00, ra0, rb0, rC00 lfd rB3, 96+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 104+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 104+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 104+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 104+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 104+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 104+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 104+KB3*8(pA0)#endif#if KB > 14 fmadd rC00, rA0, rB0, rC00 lfd rb3, 104+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 112+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 112+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 112+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 112+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 112+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 112+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 112+KB3*8(pA0)#endif#if KB > 15 fmadd rC00, ra0, rb0, rC00 lfd rB3, 112+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 120+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 120+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 120+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 120+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 120+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 120+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 120+KB3*8(pA0)#endif#if KB > 16 fmadd rC00, rA0, rB0, rC00 lfd rb3, 120+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 128+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 128+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 128+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 128+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 128+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 128+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 128+KB3*8(pA0)#endif#if KB > 17 fmadd rC00, ra0, rb0, rC00 lfd rB3, 128+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 136+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 136+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 136+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 136+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 136+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 136+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 136+KB3*8(pA0)#endif#if KB > 18 fmadd rC00, rA0, rB0, rC00 lfd rb3, 136+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 144+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 144+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 144+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 144+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 144+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 144+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 144+KB3*8(pA0)#endif#if KB > 19 fmadd rC00, ra0, rb0, rC00 lfd rB3, 144+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 152+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 152+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 152+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 152+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 152+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 152+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 152+KB3*8(pA0)#endif#if KB > 20 fmadd rC00, rA0, rB0, rC00 lfd rb3, 152+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 160+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 160+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 160+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 160+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 160+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 160+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 160+KB3*8(pA0)#endif#if KB > 21 fmadd rC00, ra0, rb0, rC00 lfd rB3, 160+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 168+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 168+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 168+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 168+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 168+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 168+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 168+KB3*8(pA0)#endif#if KB > 22 fmadd rC00, rA0, rB0, rC00 lfd rb3, 168+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 176+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 176+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 176+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 176+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 176+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 176+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 176+KB3*8(pA0)#endif#if KB > 23 fmadd rC00, ra0, rb0, rC00 lfd rB3, 176+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 184+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 184+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 184+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 184+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 184+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 184+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 184+KB3*8(pA0)#endif#if KB > 24 fmadd rC00, rA0, rB0, rC00 lfd rb3, 184+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 192+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 192+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 192+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 192+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 192+KB1*8(pA0)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?