atl_dmm4x4x80_ppc.c
来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 2,991 行 · 第 1/5 页
C
2,991 行
fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 #if KB > 4 lfd rA3, KB3*8+32(pA0) #endif #if KB > 4 lfd rB1, KB*8+32(pB0) #endif #if KB > 4 lfd rB2, KB2*8+32(pB0) #endif #if KB > 4 lfd rB3, KB3*8+32(pB0) #endif fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 fmadd rC03, ra0, rb3, rC03 fmadd rC13, ra1, rb3, rC13 fmadd rC23, ra2, rb3, rC23 fmadd rC33, ra3, rb3, rC33#endif /* end K=4 block */#if KB > 4 #if KB > 5 lfd rb0, 40(pB0) #endif #if KB > 5 lfd ra0, 40(pA0) #endif #if KB > 5 lfd ra1, KB*8+40(pA0) #endif #if KB > 5 lfd ra2, KB2*8+40(pA0) #endif fmadd rC00, rA0, rB0, rC00 fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 #if KB > 5 lfd ra3, KB3*8+40(pA0) #endif #if KB > 5 lfd rb1, KB*8+40(pB0) #endif #if KB > 5 lfd rb2, KB2*8+40(pB0) #endif #if KB > 5 lfd rb3, KB3*8+40(pB0) #endif fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 fmadd rC03, rA0, rB3, rC03 fmadd rC13, rA1, rB3, rC13 fmadd rC23, rA2, rB3, rC23 fmadd rC33, rA3, rB3, rC33#endif /* end K=5 block */#if KB > 5 #if KB > 6 lfd rB0, 48(pB0) #endif #if KB > 6 lfd rA0, 48(pA0) #endif #if KB > 6 lfd rA1, KB*8+48(pA0) #endif #if KB > 6 lfd rA2, KB2*8+48(pA0) #endif fmadd rC00, ra0, rb0, rC00 fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 #if KB > 6 lfd rA3, KB3*8+48(pA0) #endif #if KB > 6 lfd rB1, KB*8+48(pB0) #endif #if KB > 6 lfd rB2, KB2*8+48(pB0) #endif #if KB > 6 lfd rB3, KB3*8+48(pB0) #endif fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 fmadd rC03, ra0, rb3, rC03 fmadd rC13, ra1, rb3, rC13 fmadd rC23, ra2, rb3, rC23 fmadd rC33, ra3, rb3, rC33#endif /* end K=6 block */#if KB > 6 #if KB > 7 lfd rb0, 56(pB0) #endif #if KB > 7 lfd ra0, 56(pA0) #endif #if KB > 7 lfd ra1, KB*8+56(pA0) #endif #if KB > 7 lfd ra2, KB2*8+56(pA0) #endif fmadd rC00, rA0, rB0, rC00 fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 #if KB > 7 lfd ra3, KB3*8+56(pA0) #endif #if KB > 7 lfd rb1, KB*8+56(pB0) #endif #if KB > 7 lfd rb2, KB2*8+56(pB0) #endif #if KB > 7 lfd rb3, KB3*8+56(pB0) #endif fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 fmadd rC03, rA0, rB3, rC03 fmadd rC13, rA1, rB3, rC13 fmadd rC23, rA2, rB3, rC23 fmadd rC33, rA3, rB3, rC33#endif /* end K=7 block */#if KB > 7 #if KB > 8 lfd rB0, 64(pB0) #endif #if KB > 8 lfd rA0, 64(pA0) #endif #if KB > 8 lfd rA1, KB*8+64(pA0) #endif #if KB > 8 lfd rA2, KB2*8+64(pA0) #endif fmadd rC00, ra0, rb0, rC00 fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 #if KB > 8 lfd rA3, KB3*8+64(pA0) #endif #if KB > 8 lfd rB1, KB*8+64(pB0) #endif #if KB > 8 lfd rB2, KB2*8+64(pB0) #endif #if KB > 8 lfd rB3, KB3*8+64(pB0) #endif fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 fmadd rC03, ra0, rb3, rC03 fmadd rC13, ra1, rb3, rC13 fmadd rC23, ra2, rb3, rC23 fmadd rC33, ra3, rb3, rC33#endif /* end K=8 block */#if KB > 8 #if KB > 9 lfd rb0, 72(pB0) #endif #if KB > 9 lfd ra0, 72(pA0) #endif #if KB > 9 lfd ra1, KB*8+72(pA0) #endif #if KB > 9 lfd ra2, KB2*8+72(pA0) #endif fmadd rC00, rA0, rB0, rC00 fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 #if KB > 9 lfd ra3, KB3*8+72(pA0) #endif #if KB > 9 lfd rb1, KB*8+72(pB0) #endif #if KB > 9 lfd rb2, KB2*8+72(pB0) #endif #if KB > 9 lfd rb3, KB3*8+72(pB0) #endif fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 fmadd rC03, rA0, rB3, rC03 fmadd rC13, rA1, rB3, rC13 fmadd rC23, rA2, rB3, rC23 fmadd rC33, rA3, rB3, rC33#endif /* end K=9 block */#if KB > 9 #if KB > 10 lfd rB0, 80(pB0) #endif #if KB > 10 lfd rA0, 80(pA0) #endif #if KB > 10 lfd rA1, KB*8+80(pA0) #endif #if KB > 10 lfd rA2, KB2*8+80(pA0) #endif fmadd rC00, ra0, rb0, rC00 fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 #if KB > 10 lfd rA3, KB3*8+80(pA0) #endif #if KB > 10 lfd rB1, KB*8+80(pB0) #endif #if KB > 10 lfd rB2, KB2*8+80(pB0) #endif #if KB > 10 lfd rB3, KB3*8+80(pB0) #endif fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 fmadd rC03, ra0, rb3, rC03 fmadd rC13, ra1, rb3, rC13 fmadd rC23, ra2, rb3, rC23 fmadd rC33, ra3, rb3, rC33#endif /* end K=10 block */#if KB > 10 #if KB > 11 lfd rb0, 88(pB0) #endif #if KB > 11 lfd ra0, 88(pA0) #endif #if KB > 11 lfd ra1, KB*8+88(pA0) #endif #if KB > 11 lfd ra2, KB2*8+88(pA0) #endif fmadd rC00, rA0, rB0, rC00 fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 #if KB > 11 lfd ra3, KB3*8+88(pA0) #endif #if KB > 11 lfd rb1, KB*8+88(pB0) #endif #if KB > 11 lfd rb2, KB2*8+88(pB0) #endif #if KB > 11 lfd rb3, KB3*8+88(pB0) #endif fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 fmadd rC03, rA0, rB3, rC03 fmadd rC13, rA1, rB3, rC13 fmadd rC23, rA2, rB3, rC23 fmadd rC33, rA3, rB3, rC33#endif /* end K=11 block */#if KB > 11 #if KB > 12 lfd rB0, 96(pB0) #endif #if KB > 12 lfd rA0, 96(pA0) #endif #if KB > 12 lfd rA1, KB*8+96(pA0) #endif #if KB > 12 lfd rA2, KB2*8+96(pA0) #endif fmadd rC00, ra0, rb0, rC00 fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 #if KB > 12 lfd rA3, KB3*8+96(pA0) #endif #if KB > 12 lfd rB1, KB*8+96(pB0) #endif #if KB > 12 lfd rB2, KB2*8+96(pB0) #endif #if KB > 12 lfd rB3, KB3*8+96(pB0) #endif fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 fmadd rC03, ra0, rb3, rC03 fmadd rC13, ra1, rb3, rC13 fmadd rC23, ra2, rb3, rC23 fmadd rC33, ra3, rb3, rC33#endif /* end K=12 block */#if KB > 12 #if KB > 13 lfd rb0, 104(pB0) #endif #if KB > 13 lfd ra0, 104(pA0) #endif #if KB > 13 lfd ra1, KB*8+104(pA0) #endif #if KB > 13 lfd ra2, KB2*8+104(pA0) #endif fmadd rC00, rA0, rB0, rC00 fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 #if KB > 13 lfd ra3, KB3*8+104(pA0) #endif #if KB > 13 lfd rb1, KB*8+104(pB0) #endif #if KB > 13 lfd rb2, KB2*8+104(pB0) #endif #if KB > 13 lfd rb3, KB3*8+104(pB0) #endif fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 fmadd rC03, rA0, rB3, rC03 fmadd rC13, rA1, rB3, rC13 fmadd rC23, rA2, rB3, rC23 fmadd rC33, rA3, rB3, rC33#endif /* end K=13 block */#if KB > 13 #if KB > 14 lfd rB0, 112(pB0) #endif #if KB > 14 lfd rA0, 112(pA0) #endif #if KB > 14 lfd rA1, KB*8+112(pA0) #endif #if KB > 14 lfd rA2, KB2*8+112(pA0) #endif fmadd rC00, ra0, rb0, rC00 fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 #if KB > 14 lfd rA3, KB3*8+112(pA0) #endif #if KB > 14 lfd rB1, KB*8+112(pB0) #endif #if KB > 14 lfd rB2, KB2*8+112(pB0) #endif #if KB > 14 lfd rB3, KB3*8+112(pB0) #endif fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 fmadd rC03, ra0, rb3, rC03 fmadd rC13, ra1, rb3, rC13 fmadd rC23, ra2, rb3, rC23 fmadd rC33, ra3, rb3, rC33#endif /* end K=14 block */#if KB > 14 #if KB > 15 lfd rb0, 120(pB0) #endif #if KB > 15 lfd ra0, 120(pA0) #endif #if KB > 15 lfd ra1, KB*8+120(pA0) #endif #if KB > 15 lfd ra2, KB2*8+120(pA0) #endif fmadd rC00, rA0, rB0, rC00 fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 #if KB > 15 lfd ra3, KB3*8+120(pA0) #endif #if KB > 15 lfd rb1, KB*8+120(pB0) #endif #if KB > 15 lfd rb2, KB2*8+120(pB0) #endif #if KB > 15 lfd rb3, KB3*8+120(pB0) #endif fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 fmadd rC03, rA0, rB3, rC03 fmadd rC13, rA1, rB3, rC13 fmadd rC23, rA2, rB3, rC23 fmadd rC33, rA3, rB3, rC33#endif /* end K=15 block */#if KB > 15 #if KB > 16 lfd rB0, 128(pB0) #endif #if KB > 16 lfd rA0, 128(pA0) #endif #if KB > 16 lfd rA1, KB*8+128(pA0) #endif #if KB > 16 lfd rA2, KB2*8+128(pA0) #endif fmadd rC00, ra0, rb0, rC00 fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 #if KB > 16 lfd rA3, KB3*8+128(pA0) #endif #if KB > 16 lfd rB1, KB*8+128(pB0) #endif #if KB > 16 lfd rB2, KB2*8+128(pB0) #endif #if KB > 16 lfd rB3, KB3*8+128(pB0) #endif fmadd rC01, ra0, rb1, rC01 fmadd rC11, ra1, rb1, rC11 fmadd rC21, ra2, rb1, rC21 fmadd rC31, ra3, rb1, rC31 fmadd rC02, ra0, rb2, rC02 fmadd rC12, ra1, rb2, rC12 fmadd rC22, ra2, rb2, rC22 fmadd rC32, ra3, rb2, rC32 fmadd rC03, ra0, rb3, rC03 fmadd rC13, ra1, rb3, rC13 fmadd rC23, ra2, rb3, rC23 fmadd rC33, ra3, rb3, rC33#endif /* end K=16 block */#if KB > 16 #if KB > 17 lfd rb0, 136(pB0) #endif #if KB > 17 lfd ra0, 136(pA0) #endif #if KB > 17 lfd ra1, KB*8+136(pA0) #endif #if KB > 17 lfd ra2, KB2*8+136(pA0) #endif fmadd rC00, rA0, rB0, rC00 fmadd rC10, rA1, rB0, rC10 fmadd rC20, rA2, rB0, rC20 fmadd rC30, rA3, rB0, rC30 #if KB > 17 lfd ra3, KB3*8+136(pA0) #endif #if KB > 17 lfd rb1, KB*8+136(pB0) #endif #if KB > 17 lfd rb2, KB2*8+136(pB0) #endif #if KB > 17 lfd rb3, KB3*8+136(pB0) #endif fmadd rC01, rA0, rB1, rC01 fmadd rC11, rA1, rB1, rC11 fmadd rC21, rA2, rB1, rC21 fmadd rC31, rA3, rB1, rC31 fmadd rC02, rA0, rB2, rC02 fmadd rC12, rA1, rB2, rC12 fmadd rC22, rA2, rB2, rC22 fmadd rC32, rA3, rB2, rC32 fmadd rC03, rA0, rB3, rC03 fmadd rC13, rA1, rB3, rC13 fmadd rC23, rA2, rB3, rC23 fmadd rC33, rA3, rB3, rC33#endif /* end K=17 block */#if KB > 17 #if KB > 18 lfd rB0, 144(pB0) #endif #if KB > 18 lfd rA0, 144(pA0) #endif #if KB > 18 lfd rA1, KB*8+144(pA0) #endif #if KB > 18 lfd rA2, KB2*8+144(pA0) #endif fmadd rC00, ra0, rb0, rC00 fmadd rC10, ra1, rb0, rC10 fmadd rC20, ra2, rb0, rC20 fmadd rC30, ra3, rb0, rC30 #if KB > 18 lfd rA3, KB3*8+144(pA0) #endif #if KB > 18 lfd rB1, KB*8+144(pB0) #endif #if KB > 18 lfd rB2, KB2*8+144(pB0) #endif
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?