atl_dmm4x4x32_ppc.c
来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 2,609 行 · 第 1/5 页
C
2,609 行
fmadd rC23, rA2, rB3, rC23 lfd rA2, 192+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 192+KB3*8(pA0)#endif#if KB > 25 fmadd rC00, ra0, rb0, rC00 lfd rB3, 192+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 200+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 200+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 200+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 200+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 200+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 200+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 200+KB3*8(pA0)#endif#if KB > 26 fmadd rC00, rA0, rB0, rC00 lfd rb3, 200+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 208+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 208+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 208+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 208+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 208+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 208+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 208+KB3*8(pA0)#endif#if KB > 27 fmadd rC00, ra0, rb0, rC00 lfd rB3, 208+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 216+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 216+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 216+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 216+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 216+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 216+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 216+KB3*8(pA0)#endif#if KB > 28 fmadd rC00, rA0, rB0, rC00 lfd rb3, 216+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 224+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 224+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 224+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 224+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 224+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 224+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 224+KB3*8(pA0)#endif#if KB > 29 fmadd rC00, ra0, rb0, rC00 lfd rB3, 224+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 232+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 232+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 232+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 232+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 232+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 232+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 232+KB3*8(pA0)#endif#if KB > 30 fmadd rC00, rA0, rB0, rC00 lfd rb3, 232+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 240+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 240+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 240+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 240+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 240+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 240+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 240+KB3*8(pA0)#endif#if KB > 31 fmadd rC00, ra0, rb0, rC00 lfd rB3, 240+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 248+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 248+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 248+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 248+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 248+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 248+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 248+KB3*8(pA0)#endif#if KB > 32 fmadd rC00, rA0, rB0, rC00 lfd rb3, 248+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 256+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 256+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 256+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 256+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 256+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 256+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 256+KB3*8(pA0)#endif#if KB > 33 fmadd rC00, ra0, rb0, rC00 lfd rB3, 256+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 264+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 264+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 264+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 264+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 264+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 264+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 264+KB3*8(pA0)#endif#if KB > 34 fmadd rC00, rA0, rB0, rC00 lfd rb3, 264+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 272+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 272+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 272+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 272+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 272+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 272+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 272+KB3*8(pA0)#endif#if KB > 35 fmadd rC00, ra0, rb0, rC00 lfd rB3, 272+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 280+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 280+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 280+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 280+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 280+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 280+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 280+KB3*8(pA0)#endif#if KB > 36 fmadd rC00, rA0, rB0, rC00 lfd rb3, 280+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 288+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 288+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 288+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 288+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 288+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 288+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 288+KB3*8(pA0)#endif#if KB > 37 fmadd rC00, ra0, rb0, rC00 lfd rB3, 288+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 296+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 296+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 296+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 296+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 296+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 296+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 296+KB3*8(pA0)#endif#if KB > 38 fmadd rC00, rA0, rB0, rC00 lfd rb3, 296+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 304+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 304+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 304+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 304+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 304+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 304+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 304+KB3*8(pA0)#endif#if KB > 39 fmadd rC00, ra0, rb0, rC00 lfd rB3, 304+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 312+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 312+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 312+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 312+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 312+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 312+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 312+KB3*8(pA0)#endif#if KB > 40 fmadd rC00, rA0, rB0, rC00 lfd rb3, 312+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 320+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 320+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 320+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 320+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 320+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 320+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 320+KB3*8(pA0)#endif#if KB > 41 fmadd rC00, ra0, rb0, rC00 lfd rB3, 320+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 328+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 328+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 328+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 328+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 328+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 328+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 328+KB3*8(pA0)#endif#if KB > 42 fmadd rC00, rA0, rB0, rC00 lfd rb3, 328+KB3*8(pB0) fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 lfd rB0, 336+KB0*8(pB0) fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 lfd rB1, 336+KB1*8(pB0) fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 lfd rB2, 336+KB2*8(pB0) fmadd rC03, rA0, rB3, rC03 lfd rA0, 336+KB0*8(pA0) fmadd rC13, rA1, rB3, rC13 lfd rA1, 336+KB1*8(pA0) fmadd rC23, rA2, rB3, rC23 lfd rA2, 336+KB2*8(pA0) fmadd rC33, rA3, rB3, rC33 lfd rA3, 336+KB3*8(pA0)#endif#if KB > 43 fmadd rC00, ra0, rb0, rC00 lfd rB3, 336+KB3*8(pB0) fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 lfd rb0, 344+KB0*8(pB0) fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 lfd rb1, 344+KB1*8(pB0) fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 lfd rb2, 344+KB2*8(pB0) fmadd rC03, ra0, rb3, rC03 lfd ra0, 344+KB0*8(pA0) fmadd rC13, ra1, rb3, rC13 lfd ra1, 344+KB1*8(pA0) fmadd rC23, ra2, rb3, rC23 lfd ra2, 344+KB2*8(pA0) fmadd rC33, ra3, rb3, rC33 lfd ra3, 344+KB3*8(pA0)#endif#if KB > 44 fmadd rC00, rA0, rB0, rC00 lfd rb3, 344+KB3*8(pB0)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?