atl_dmm4x4x80_ppc.c

来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 2,991 行 · 第 1/5 页

C
2,991
字号
	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30   #if KB > 4		lfd	rA3, KB3*8+32(pA0)   #endif   #if KB > 4		lfd	rB1, KB*8+32(pB0)   #endif   #if KB > 4		lfd	rB2, KB2*8+32(pB0)   #endif   #if KB > 4		lfd	rB3, KB3*8+32(pB0)   #endif	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	fmadd	rC03, ra0, rb3, rC03	fmadd	rC13, ra1, rb3, rC13	fmadd	rC23, ra2, rb3, rC23	fmadd	rC33, ra3, rb3, rC33#endif  /* end K=4 block */#if KB > 4   #if KB > 5		lfd	rb0, 40(pB0)   #endif   #if KB > 5		lfd	ra0, 40(pA0)   #endif   #if KB > 5		lfd	ra1, KB*8+40(pA0)   #endif   #if KB > 5		lfd	ra2, KB2*8+40(pA0)   #endif	fmadd	rC00, rA0, rB0, rC00	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30   #if KB > 5		lfd	ra3, KB3*8+40(pA0)   #endif   #if KB > 5		lfd	rb1, KB*8+40(pB0)   #endif   #if KB > 5		lfd	rb2, KB2*8+40(pB0)   #endif   #if KB > 5		lfd	rb3, KB3*8+40(pB0)   #endif	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	fmadd	rC03, rA0, rB3, rC03	fmadd	rC13, rA1, rB3, rC13	fmadd	rC23, rA2, rB3, rC23	fmadd	rC33, rA3, rB3, rC33#endif  /* end K=5 block */#if KB > 5   #if KB > 6		lfd	rB0, 48(pB0)   #endif   #if KB > 6		lfd	rA0, 48(pA0)   #endif   #if KB > 6		lfd	rA1, KB*8+48(pA0)   #endif   #if KB > 6		lfd	rA2, KB2*8+48(pA0)   #endif	fmadd	rC00, ra0, rb0, rC00	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30   #if KB > 6		lfd	rA3, KB3*8+48(pA0)   #endif   #if KB > 6		lfd	rB1, KB*8+48(pB0)   #endif   #if KB > 6		lfd	rB2, KB2*8+48(pB0)   #endif   #if KB > 6		lfd	rB3, KB3*8+48(pB0)   #endif	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	fmadd	rC03, ra0, rb3, rC03	fmadd	rC13, ra1, rb3, rC13	fmadd	rC23, ra2, rb3, rC23	fmadd	rC33, ra3, rb3, rC33#endif  /* end K=6 block */#if KB > 6   #if KB > 7		lfd	rb0, 56(pB0)   #endif   #if KB > 7		lfd	ra0, 56(pA0)   #endif   #if KB > 7		lfd	ra1, KB*8+56(pA0)   #endif   #if KB > 7		lfd	ra2, KB2*8+56(pA0)   #endif	fmadd	rC00, rA0, rB0, rC00	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30   #if KB > 7		lfd	ra3, KB3*8+56(pA0)   #endif   #if KB > 7		lfd	rb1, KB*8+56(pB0)   #endif   #if KB > 7		lfd	rb2, KB2*8+56(pB0)   #endif   #if KB > 7		lfd	rb3, KB3*8+56(pB0)   #endif	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	fmadd	rC03, rA0, rB3, rC03	fmadd	rC13, rA1, rB3, rC13	fmadd	rC23, rA2, rB3, rC23	fmadd	rC33, rA3, rB3, rC33#endif  /* end K=7 block */#if KB > 7   #if KB > 8		lfd	rB0, 64(pB0)   #endif   #if KB > 8		lfd	rA0, 64(pA0)   #endif   #if KB > 8		lfd	rA1, KB*8+64(pA0)   #endif   #if KB > 8		lfd	rA2, KB2*8+64(pA0)   #endif	fmadd	rC00, ra0, rb0, rC00	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30   #if KB > 8		lfd	rA3, KB3*8+64(pA0)   #endif   #if KB > 8		lfd	rB1, KB*8+64(pB0)   #endif   #if KB > 8		lfd	rB2, KB2*8+64(pB0)   #endif   #if KB > 8		lfd	rB3, KB3*8+64(pB0)   #endif	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	fmadd	rC03, ra0, rb3, rC03	fmadd	rC13, ra1, rb3, rC13	fmadd	rC23, ra2, rb3, rC23	fmadd	rC33, ra3, rb3, rC33#endif  /* end K=8 block */#if KB > 8   #if KB > 9		lfd	rb0, 72(pB0)   #endif   #if KB > 9		lfd	ra0, 72(pA0)   #endif   #if KB > 9		lfd	ra1, KB*8+72(pA0)   #endif   #if KB > 9		lfd	ra2, KB2*8+72(pA0)   #endif	fmadd	rC00, rA0, rB0, rC00	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30   #if KB > 9		lfd	ra3, KB3*8+72(pA0)   #endif   #if KB > 9		lfd	rb1, KB*8+72(pB0)   #endif   #if KB > 9		lfd	rb2, KB2*8+72(pB0)   #endif   #if KB > 9		lfd	rb3, KB3*8+72(pB0)   #endif	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	fmadd	rC03, rA0, rB3, rC03	fmadd	rC13, rA1, rB3, rC13	fmadd	rC23, rA2, rB3, rC23	fmadd	rC33, rA3, rB3, rC33#endif  /* end K=9 block */#if KB > 9   #if KB > 10		lfd	rB0, 80(pB0)   #endif   #if KB > 10		lfd	rA0, 80(pA0)   #endif   #if KB > 10		lfd	rA1, KB*8+80(pA0)   #endif   #if KB > 10		lfd	rA2, KB2*8+80(pA0)   #endif	fmadd	rC00, ra0, rb0, rC00	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30   #if KB > 10		lfd	rA3, KB3*8+80(pA0)   #endif   #if KB > 10		lfd	rB1, KB*8+80(pB0)   #endif   #if KB > 10		lfd	rB2, KB2*8+80(pB0)   #endif   #if KB > 10		lfd	rB3, KB3*8+80(pB0)   #endif	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	fmadd	rC03, ra0, rb3, rC03	fmadd	rC13, ra1, rb3, rC13	fmadd	rC23, ra2, rb3, rC23	fmadd	rC33, ra3, rb3, rC33#endif  /* end K=10 block */#if KB > 10   #if KB > 11		lfd	rb0, 88(pB0)   #endif   #if KB > 11		lfd	ra0, 88(pA0)   #endif   #if KB > 11		lfd	ra1, KB*8+88(pA0)   #endif   #if KB > 11		lfd	ra2, KB2*8+88(pA0)   #endif	fmadd	rC00, rA0, rB0, rC00	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30   #if KB > 11		lfd	ra3, KB3*8+88(pA0)   #endif   #if KB > 11		lfd	rb1, KB*8+88(pB0)   #endif   #if KB > 11		lfd	rb2, KB2*8+88(pB0)   #endif   #if KB > 11		lfd	rb3, KB3*8+88(pB0)   #endif	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	fmadd	rC03, rA0, rB3, rC03	fmadd	rC13, rA1, rB3, rC13	fmadd	rC23, rA2, rB3, rC23	fmadd	rC33, rA3, rB3, rC33#endif  /* end K=11 block */#if KB > 11   #if KB > 12		lfd	rB0, 96(pB0)   #endif   #if KB > 12		lfd	rA0, 96(pA0)   #endif   #if KB > 12		lfd	rA1, KB*8+96(pA0)   #endif   #if KB > 12		lfd	rA2, KB2*8+96(pA0)   #endif	fmadd	rC00, ra0, rb0, rC00	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30   #if KB > 12		lfd	rA3, KB3*8+96(pA0)   #endif   #if KB > 12		lfd	rB1, KB*8+96(pB0)   #endif   #if KB > 12		lfd	rB2, KB2*8+96(pB0)   #endif   #if KB > 12		lfd	rB3, KB3*8+96(pB0)   #endif	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	fmadd	rC03, ra0, rb3, rC03	fmadd	rC13, ra1, rb3, rC13	fmadd	rC23, ra2, rb3, rC23	fmadd	rC33, ra3, rb3, rC33#endif  /* end K=12 block */#if KB > 12   #if KB > 13		lfd	rb0, 104(pB0)   #endif   #if KB > 13		lfd	ra0, 104(pA0)   #endif   #if KB > 13		lfd	ra1, KB*8+104(pA0)   #endif   #if KB > 13		lfd	ra2, KB2*8+104(pA0)   #endif	fmadd	rC00, rA0, rB0, rC00	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30   #if KB > 13		lfd	ra3, KB3*8+104(pA0)   #endif   #if KB > 13		lfd	rb1, KB*8+104(pB0)   #endif   #if KB > 13		lfd	rb2, KB2*8+104(pB0)   #endif   #if KB > 13		lfd	rb3, KB3*8+104(pB0)   #endif	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	fmadd	rC03, rA0, rB3, rC03	fmadd	rC13, rA1, rB3, rC13	fmadd	rC23, rA2, rB3, rC23	fmadd	rC33, rA3, rB3, rC33#endif  /* end K=13 block */#if KB > 13   #if KB > 14		lfd	rB0, 112(pB0)   #endif   #if KB > 14		lfd	rA0, 112(pA0)   #endif   #if KB > 14		lfd	rA1, KB*8+112(pA0)   #endif   #if KB > 14		lfd	rA2, KB2*8+112(pA0)   #endif	fmadd	rC00, ra0, rb0, rC00	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30   #if KB > 14		lfd	rA3, KB3*8+112(pA0)   #endif   #if KB > 14		lfd	rB1, KB*8+112(pB0)   #endif   #if KB > 14		lfd	rB2, KB2*8+112(pB0)   #endif   #if KB > 14		lfd	rB3, KB3*8+112(pB0)   #endif	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	fmadd	rC03, ra0, rb3, rC03	fmadd	rC13, ra1, rb3, rC13	fmadd	rC23, ra2, rb3, rC23	fmadd	rC33, ra3, rb3, rC33#endif  /* end K=14 block */#if KB > 14   #if KB > 15		lfd	rb0, 120(pB0)   #endif   #if KB > 15		lfd	ra0, 120(pA0)   #endif   #if KB > 15		lfd	ra1, KB*8+120(pA0)   #endif   #if KB > 15		lfd	ra2, KB2*8+120(pA0)   #endif	fmadd	rC00, rA0, rB0, rC00	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30   #if KB > 15		lfd	ra3, KB3*8+120(pA0)   #endif   #if KB > 15		lfd	rb1, KB*8+120(pB0)   #endif   #if KB > 15		lfd	rb2, KB2*8+120(pB0)   #endif   #if KB > 15		lfd	rb3, KB3*8+120(pB0)   #endif	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	fmadd	rC03, rA0, rB3, rC03	fmadd	rC13, rA1, rB3, rC13	fmadd	rC23, rA2, rB3, rC23	fmadd	rC33, rA3, rB3, rC33#endif  /* end K=15 block */#if KB > 15   #if KB > 16		lfd	rB0, 128(pB0)   #endif   #if KB > 16		lfd	rA0, 128(pA0)   #endif   #if KB > 16		lfd	rA1, KB*8+128(pA0)   #endif   #if KB > 16		lfd	rA2, KB2*8+128(pA0)   #endif	fmadd	rC00, ra0, rb0, rC00	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30   #if KB > 16		lfd	rA3, KB3*8+128(pA0)   #endif   #if KB > 16		lfd	rB1, KB*8+128(pB0)   #endif   #if KB > 16		lfd	rB2, KB2*8+128(pB0)   #endif   #if KB > 16		lfd	rB3, KB3*8+128(pB0)   #endif	fmadd	rC01, ra0, rb1, rC01	fmadd	rC11, ra1, rb1, rC11	fmadd	rC21, ra2, rb1, rC21	fmadd	rC31, ra3, rb1, rC31	fmadd	rC02, ra0, rb2, rC02	fmadd	rC12, ra1, rb2, rC12	fmadd	rC22, ra2, rb2, rC22	fmadd	rC32, ra3, rb2, rC32	fmadd	rC03, ra0, rb3, rC03	fmadd	rC13, ra1, rb3, rC13	fmadd	rC23, ra2, rb3, rC23	fmadd	rC33, ra3, rb3, rC33#endif  /* end K=16 block */#if KB > 16   #if KB > 17		lfd	rb0, 136(pB0)   #endif   #if KB > 17		lfd	ra0, 136(pA0)   #endif   #if KB > 17		lfd	ra1, KB*8+136(pA0)   #endif   #if KB > 17		lfd	ra2, KB2*8+136(pA0)   #endif	fmadd	rC00, rA0, rB0, rC00	fmadd	rC10, rA1, rB0, rC10	fmadd	rC20, rA2, rB0, rC20	fmadd	rC30, rA3, rB0, rC30   #if KB > 17		lfd	ra3, KB3*8+136(pA0)   #endif   #if KB > 17		lfd	rb1, KB*8+136(pB0)   #endif   #if KB > 17		lfd	rb2, KB2*8+136(pB0)   #endif   #if KB > 17		lfd	rb3, KB3*8+136(pB0)   #endif	fmadd	rC01, rA0, rB1, rC01	fmadd	rC11, rA1, rB1, rC11	fmadd	rC21, rA2, rB1, rC21	fmadd	rC31, rA3, rB1, rC31	fmadd	rC02, rA0, rB2, rC02	fmadd	rC12, rA1, rB2, rC12	fmadd	rC22, rA2, rB2, rC22	fmadd	rC32, rA3, rB2, rC32	fmadd	rC03, rA0, rB3, rC03	fmadd	rC13, rA1, rB3, rC13	fmadd	rC23, rA2, rB3, rC23	fmadd	rC33, rA3, rB3, rC33#endif  /* end K=17 block */#if KB > 17   #if KB > 18		lfd	rB0, 144(pB0)   #endif   #if KB > 18		lfd	rA0, 144(pA0)   #endif   #if KB > 18		lfd	rA1, KB*8+144(pA0)   #endif   #if KB > 18		lfd	rA2, KB2*8+144(pA0)   #endif	fmadd	rC00, ra0, rb0, rC00	fmadd	rC10, ra1, rb0, rC10	fmadd	rC20, ra2, rb0, rC20	fmadd	rC30, ra3, rb0, rC30   #if KB > 18		lfd	rA3, KB3*8+144(pA0)   #endif   #if KB > 18		lfd	rB1, KB*8+144(pB0)   #endif   #if KB > 18		lfd	rB2, KB2*8+144(pB0)   #endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?