atl_smm4x4x16_hppa.c

来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 1,134 行 · 第 1/2 页

C
1,134
字号
	fmpyfadd,sgl	rA3a, rB0a, rC30, rC30						fldd   	56(pA0), ra0	fmpyfadd,sgl	rA0a, rB1a, rC01, rC01	fmpyfadd,sgl	rA1a, rB1a, rC11, rC11	fmpyfadd,sgl	rA2a, rB1a, rC21, rC21	fmpyfadd,sgl	rA3a, rB1a, rC31, rC31						fldd   	56(pA1), ra1	fmpyfadd,sgl	rA0a, rB2a, rC02, rC02	fmpyfadd,sgl	rA1a, rB2a, rC12, rC12	fmpyfadd,sgl	rA2a, rB2a, rC22, rC22	fmpyfadd,sgl	rA3a, rB2a, rC32, rC32						fldd   	56(pA2), ra2	fmpyfadd,sgl	rA0a, rB3a, rC03, rC03	fmpyfadd,sgl	rA1a, rB3a, rC13, rC13	fmpyfadd,sgl	rA2a, rB3a, rC23, rC23	fmpyfadd,sgl	rA3a, rB3a, rC33, rC33						fldd   	56(pA3), ra3	fmpyfadd,sgl	rA0b, rB0b, rC00, rC00	fmpyfadd,sgl	rA1b, rB0b, rC10, rC10	fmpyfadd,sgl	rA2b, rB0b, rC20, rC20	fmpyfadd,sgl	rA3b, rB0b, rC30, rC30						fldd   	56(pB0), rB0	fmpyfadd,sgl	rA0b, rB1b, rC01, rC01	fmpyfadd,sgl	rA1b, rB1b, rC11, rC11	fmpyfadd,sgl	rA2b, rB1b, rC21, rC21	fmpyfadd,sgl	rA3b, rB1b, rC31, rC31						fldd   	56(pB1), rB1	fmpyfadd,sgl	rA0b, rB2b, rC02, rC02	fmpyfadd,sgl	rA1b, rB2b, rC12, rC12	fmpyfadd,sgl	rA2b, rB2b, rC22, rC22	fmpyfadd,sgl	rA3b, rB2b, rC32, rC32						fldd   	56(pB2), rB2	fmpyfadd,sgl	rA0b, rB3b, rC03, rC03	fmpyfadd,sgl	rA1b, rB3b, rC13, rC13	fmpyfadd,sgl	rA2b, rB3b, rC23, rC23	fmpyfadd,sgl	rA3b, rB3b, rC33, rC33						fldd   	56(pB3), rB3	fmpyfadd,sgl	ra0a, rB0a, rC00, rC00	fmpyfadd,sgl	ra1a, rB0a, rC10, rC10	fmpyfadd,sgl	ra2a, rB0a, rC20, rC20	fmpyfadd,sgl	ra3a, rB0a, rC30, rC30						fldd,mb 64(pA0), rA0	fmpyfadd,sgl	ra0a, rB1a, rC01, rC01	fmpyfadd,sgl	ra1a, rB1a, rC11, rC11	fmpyfadd,sgl	ra2a, rB1a, rC21, rC21	fmpyfadd,sgl	ra3a, rB1a, rC31, rC31						fldd,mb 64(pA1), rA1	fmpyfadd,sgl	ra0a, rB2a, rC02, rC02	fmpyfadd,sgl	ra1a, rB2a, rC12, rC12	fmpyfadd,sgl	ra2a, rB2a, rC22, rC22	fmpyfadd,sgl	ra3a, rB2a, rC32, rC32						fldd,mb 64(pA2), rA2	fmpyfadd,sgl	ra0a, rB3a, rC03, rC03	fmpyfadd,sgl	ra1a, rB3a, rC13, rC13	fmpyfadd,sgl	ra2a, rB3a, rC23, rC23	fmpyfadd,sgl	ra3a, rB3a, rC33, rC33						fldd,mb 64(pA3), rA3	fmpyfadd,sgl	ra0b, rB0b, rC00, rC00	fmpyfadd,sgl	ra1b, rB0b, rC10, rC10	fmpyfadd,sgl	ra2b, rB0b, rC20, rC20	fmpyfadd,sgl	ra3b, rB0b, rC30, rC30						fldd,mb	64(pB0), rB0	fmpyfadd,sgl	ra0b, rB1b, rC01, rC01	fmpyfadd,sgl	ra1b, rB1b, rC11, rC11	fmpyfadd,sgl	ra2b, rB1b, rC21, rC21	fmpyfadd,sgl	ra3b, rB1b, rC31, rC31						fldd,mb   64(pB1), rB1	fmpyfadd,sgl	ra0b, rB2b, rC02, rC02	fmpyfadd,sgl	ra1b, rB2b, rC12, rC12	fmpyfadd,sgl	ra2b, rB2b, rC22, rC22	fmpyfadd,sgl	ra3b, rB2b, rC32, rC32						fldd,mb   64(pB2), rB2	fmpyfadd,sgl	ra0b, rB3b, rC03, rC03	fmpyfadd,sgl	ra1b, rB3b, rC13, rC13	fmpyfadd,sgl	ra2b, rB3b, rC23, rC23	fmpyfadd,sgl	ra3b, rB3b, rC33, rC33;;       while (--k);;	addib,<>	-16, rK, KLOOP						fldd,mb 64(pB3), rB3;	nop#endif;;       Drain pipe;	fmpyfadd,sgl	rA0a, rB0a, rC00, rC00	fmpyfadd,sgl	rA1a, rB0a, rC10, rC10	fmpyfadd,sgl	rA2a, rB0a, rC20, rC20	fmpyfadd,sgl	rA3a, rB0a, rC30, rC30						fldd   	8(pA0), ra0	fmpyfadd,sgl	rA0a, rB1a, rC01, rC01	fmpyfadd,sgl	rA1a, rB1a, rC11, rC11	fmpyfadd,sgl	rA2a, rB1a, rC21, rC21	fmpyfadd,sgl	rA3a, rB1a, rC31, rC31						fldd   	8(pA1), ra1	fmpyfadd,sgl	rA0a, rB2a, rC02, rC02	fmpyfadd,sgl	rA1a, rB2a, rC12, rC12	fmpyfadd,sgl	rA2a, rB2a, rC22, rC22	fmpyfadd,sgl	rA3a, rB2a, rC32, rC32						fldd   	8(pA2), ra2	fmpyfadd,sgl	rA0a, rB3a, rC03, rC03	fmpyfadd,sgl	rA1a, rB3a, rC13, rC13	fmpyfadd,sgl	rA2a, rB3a, rC23, rC23	fmpyfadd,sgl	rA3a, rB3a, rC33, rC33						fldd   	8(pA3), ra3	fmpyfadd,sgl	rA0b, rB0b, rC00, rC00	fmpyfadd,sgl	rA1b, rB0b, rC10, rC10	fmpyfadd,sgl	rA2b, rB0b, rC20, rC20	fmpyfadd,sgl	rA3b, rB0b, rC30, rC30						fldd   	8(pB0), rB0	fmpyfadd,sgl	rA0b, rB1b, rC01, rC01	fmpyfadd,sgl	rA1b, rB1b, rC11, rC11	fmpyfadd,sgl	rA2b, rB1b, rC21, rC21	fmpyfadd,sgl	rA3b, rB1b, rC31, rC31						fldd   	8(pB1), rB1	fmpyfadd,sgl	rA0b, rB2b, rC02, rC02	fmpyfadd,sgl	rA1b, rB2b, rC12, rC12	fmpyfadd,sgl	rA2b, rB2b, rC22, rC22	fmpyfadd,sgl	rA3b, rB2b, rC32, rC32						fldd   	8(pB2), rB2	fmpyfadd,sgl	rA0b, rB3b, rC03, rC03	fmpyfadd,sgl	rA1b, rB3b, rC13, rC13	fmpyfadd,sgl	rA2b, rB3b, rC23, rC23	fmpyfadd,sgl	rA3b, rB3b, rC33, rC33						fldd   	8(pB3), rB3	fmpyfadd,sgl	ra0a, rB0a, rC00, rC00	fmpyfadd,sgl	ra1a, rB0a, rC10, rC10	fmpyfadd,sgl	ra2a, rB0a, rC20, rC20	fmpyfadd,sgl	ra3a, rB0a, rC30, rC30						fldd    16(pA0), rA0	fmpyfadd,sgl	ra0a, rB1a, rC01, rC01	fmpyfadd,sgl	ra1a, rB1a, rC11, rC11	fmpyfadd,sgl	ra2a, rB1a, rC21, rC21	fmpyfadd,sgl	ra3a, rB1a, rC31, rC31						fldd    16(pA1), rA1	fmpyfadd,sgl	ra0a, rB2a, rC02, rC02	fmpyfadd,sgl	ra1a, rB2a, rC12, rC12	fmpyfadd,sgl	ra2a, rB2a, rC22, rC22	fmpyfadd,sgl	ra3a, rB2a, rC32, rC32						fldd    16(pA2), rA2	fmpyfadd,sgl	ra0a, rB3a, rC03, rC03	fmpyfadd,sgl	ra1a, rB3a, rC13, rC13	fmpyfadd,sgl	ra2a, rB3a, rC23, rC23	fmpyfadd,sgl	ra3a, rB3a, rC33, rC33						fldd    16(pA3), rA3	fmpyfadd,sgl	ra0b, rB0b, rC00, rC00	fmpyfadd,sgl	ra1b, rB0b, rC10, rC10	fmpyfadd,sgl	ra2b, rB0b, rC20, rC20	fmpyfadd,sgl	ra3b, rB0b, rC30, rC30						fldd   	16(pB0), rB0	fmpyfadd,sgl	ra0b, rB1b, rC01, rC01	fmpyfadd,sgl	ra1b, rB1b, rC11, rC11	fmpyfadd,sgl	ra2b, rB1b, rC21, rC21	fmpyfadd,sgl	ra3b, rB1b, rC31, rC31						fldd      16(pB1), rB1	fmpyfadd,sgl	ra0b, rB2b, rC02, rC02	fmpyfadd,sgl	ra1b, rB2b, rC12, rC12	fmpyfadd,sgl	ra2b, rB2b, rC22, rC22	fmpyfadd,sgl	ra3b, rB2b, rC32, rC32						fldd      16(pB2), rB2	fmpyfadd,sgl	ra0b, rB3b, rC03, rC03	fmpyfadd,sgl	ra1b, rB3b, rC13, rC13	fmpyfadd,sgl	ra2b, rB3b, rC23, rC23	fmpyfadd,sgl	ra3b, rB3b, rC33, rC33						fldd    16(pB3), rB3	fmpyfadd,sgl	rA0a, rB0a, rC00, rC00	fmpyfadd,sgl	rA1a, rB0a, rC10, rC10	fmpyfadd,sgl	rA2a, rB0a, rC20, rC20	fmpyfadd,sgl	rA3a, rB0a, rC30, rC30						fldd   	24(pA0), ra0	fmpyfadd,sgl	rA0a, rB1a, rC01, rC01	fmpyfadd,sgl	rA1a, rB1a, rC11, rC11	fmpyfadd,sgl	rA2a, rB1a, rC21, rC21	fmpyfadd,sgl	rA3a, rB1a, rC31, rC31						fldd   	24(pA1), ra1	fmpyfadd,sgl	rA0a, rB2a, rC02, rC02	fmpyfadd,sgl	rA1a, rB2a, rC12, rC12	fmpyfadd,sgl	rA2a, rB2a, rC22, rC22	fmpyfadd,sgl	rA3a, rB2a, rC32, rC32						fldd   	24(pA2), ra2	fmpyfadd,sgl	rA0a, rB3a, rC03, rC03	fmpyfadd,sgl	rA1a, rB3a, rC13, rC13	fmpyfadd,sgl	rA2a, rB3a, rC23, rC23	fmpyfadd,sgl	rA3a, rB3a, rC33, rC33						fldd   	24(pA3), ra3	fmpyfadd,sgl	rA0b, rB0b, rC00, rC00	fmpyfadd,sgl	rA1b, rB0b, rC10, rC10	fmpyfadd,sgl	rA2b, rB0b, rC20, rC20	fmpyfadd,sgl	rA3b, rB0b, rC30, rC30						fldd   	24(pB0), rB0	fmpyfadd,sgl	rA0b, rB1b, rC01, rC01	fmpyfadd,sgl	rA1b, rB1b, rC11, rC11	fmpyfadd,sgl	rA2b, rB1b, rC21, rC21	fmpyfadd,sgl	rA3b, rB1b, rC31, rC31						fldd   	24(pB1), rB1	fmpyfadd,sgl	rA0b, rB2b, rC02, rC02	fmpyfadd,sgl	rA1b, rB2b, rC12, rC12	fmpyfadd,sgl	rA2b, rB2b, rC22, rC22	fmpyfadd,sgl	rA3b, rB2b, rC32, rC32						fldd   	24(pB2), rB2	fmpyfadd,sgl	rA0b, rB3b, rC03, rC03	fmpyfadd,sgl	rA1b, rB3b, rC13, rC13	fmpyfadd,sgl	rA2b, rB3b, rC23, rC23	fmpyfadd,sgl	rA3b, rB3b, rC33, rC33						fldd   	24(pB3), rB3	fmpyfadd,sgl	ra0a, rB0a, rC00, rC00	fmpyfadd,sgl	ra1a, rB0a, rC10, rC10	fmpyfadd,sgl	ra2a, rB0a, rC20, rC20	fmpyfadd,sgl	ra3a, rB0a, rC30, rC30						fldd    32(pA0), rA0	fmpyfadd,sgl	ra0a, rB1a, rC01, rC01	fmpyfadd,sgl	ra1a, rB1a, rC11, rC11	fmpyfadd,sgl	ra2a, rB1a, rC21, rC21	fmpyfadd,sgl	ra3a, rB1a, rC31, rC31						fldd    32(pA1), rA1	fmpyfadd,sgl	ra0a, rB2a, rC02, rC02	fmpyfadd,sgl	ra1a, rB2a, rC12, rC12	fmpyfadd,sgl	ra2a, rB2a, rC22, rC22	fmpyfadd,sgl	ra3a, rB2a, rC32, rC32						fldd    32(pA2), rA2	fmpyfadd,sgl	ra0a, rB3a, rC03, rC03	fmpyfadd,sgl	ra1a, rB3a, rC13, rC13	fmpyfadd,sgl	ra2a, rB3a, rC23, rC23	fmpyfadd,sgl	ra3a, rB3a, rC33, rC33						fldd    32(pA3), rA3	fmpyfadd,sgl	ra0b, rB0b, rC00, rC00	fmpyfadd,sgl	ra1b, rB0b, rC10, rC10	fmpyfadd,sgl	ra2b, rB0b, rC20, rC20	fmpyfadd,sgl	ra3b, rB0b, rC30, rC30						fldd   	32(pB0), rB0	fmpyfadd,sgl	ra0b, rB1b, rC01, rC01	fmpyfadd,sgl	ra1b, rB1b, rC11, rC11	fmpyfadd,sgl	ra2b, rB1b, rC21, rC21	fmpyfadd,sgl	ra3b, rB1b, rC31, rC31						fldd      32(pB1), rB1	fmpyfadd,sgl	ra0b, rB2b, rC02, rC02	fmpyfadd,sgl	ra1b, rB2b, rC12, rC12	fmpyfadd,sgl	ra2b, rB2b, rC22, rC22	fmpyfadd,sgl	ra3b, rB2b, rC32, rC32						fldd      32(pB2), rB2	fmpyfadd,sgl	ra0b, rB3b, rC03, rC03	fmpyfadd,sgl	ra1b, rB3b, rC13, rC13	fmpyfadd,sgl	ra2b, rB3b, rC23, rC23	fmpyfadd,sgl	ra3b, rB3b, rC33, rC33						fldd    32(pB3), rB3	fmpyfadd,sgl	rA0a, rB0a, rC00, rC00	fmpyfadd,sgl	rA1a, rB0a, rC10, rC10	fmpyfadd,sgl	rA2a, rB0a, rC20, rC20	fmpyfadd,sgl	rA3a, rB0a, rC30, rC30						fldd   	40(pA0), ra0	fmpyfadd,sgl	rA0a, rB1a, rC01, rC01	fmpyfadd,sgl	rA1a, rB1a, rC11, rC11	fmpyfadd,sgl	rA2a, rB1a, rC21, rC21	fmpyfadd,sgl	rA3a, rB1a, rC31, rC31						fldd   	40(pA1), ra1	fmpyfadd,sgl	rA0a, rB2a, rC02, rC02	fmpyfadd,sgl	rA1a, rB2a, rC12, rC12	fmpyfadd,sgl	rA2a, rB2a, rC22, rC22	fmpyfadd,sgl	rA3a, rB2a, rC32, rC32						fldd   	40(pA2), ra2	fmpyfadd,sgl	rA0a, rB3a, rC03, rC03	fmpyfadd,sgl	rA1a, rB3a, rC13, rC13	fmpyfadd,sgl	rA2a, rB3a, rC23, rC23	fmpyfadd,sgl	rA3a, rB3a, rC33, rC33						fldd   	40(pA3), ra3	fmpyfadd,sgl	rA0b, rB0b, rC00, rC00	fmpyfadd,sgl	rA1b, rB0b, rC10, rC10	fmpyfadd,sgl	rA2b, rB0b, rC20, rC20	fmpyfadd,sgl	rA3b, rB0b, rC30, rC30						fldd   	40(pB0), rB0	fmpyfadd,sgl	rA0b, rB1b, rC01, rC01	fmpyfadd,sgl	rA1b, rB1b, rC11, rC11	fmpyfadd,sgl	rA2b, rB1b, rC21, rC21	fmpyfadd,sgl	rA3b, rB1b, rC31, rC31						fldd   	40(pB1), rB1	fmpyfadd,sgl	rA0b, rB2b, rC02, rC02	fmpyfadd,sgl	rA1b, rB2b, rC12, rC12	fmpyfadd,sgl	rA2b, rB2b, rC22, rC22	fmpyfadd,sgl	rA3b, rB2b, rC32, rC32						fldd   	40(pB2), rB2	fmpyfadd,sgl	rA0b, rB3b, rC03, rC03	fmpyfadd,sgl	rA1b, rB3b, rC13, rC13	fmpyfadd,sgl	rA2b, rB3b, rC23, rC23	fmpyfadd,sgl	rA3b, rB3b, rC33, rC33						fldd   	40(pB3), rB3	fmpyfadd,sgl	ra0a, rB0a, rC00, rC00	fmpyfadd,sgl	ra1a, rB0a, rC10, rC10	fmpyfadd,sgl	ra2a, rB0a, rC20, rC20	fmpyfadd,sgl	ra3a, rB0a, rC30, rC30						fldd    48(pA0), rA0	fmpyfadd,sgl	ra0a, rB1a, rC01, rC01	fmpyfadd,sgl	ra1a, rB1a, rC11, rC11	fmpyfadd,sgl	ra2a, rB1a, rC21, rC21	fmpyfadd,sgl	ra3a, rB1a, rC31, rC31						fldd    48(pA1), rA1	fmpyfadd,sgl	ra0a, rB2a, rC02, rC02	fmpyfadd,sgl	ra1a, rB2a, rC12, rC12	fmpyfadd,sgl	ra2a, rB2a, rC22, rC22	fmpyfadd,sgl	ra3a, rB2a, rC32, rC32						fldd    48(pA2), rA2	fmpyfadd,sgl	ra0a, rB3a, rC03, rC03	fmpyfadd,sgl	ra1a, rB3a, rC13, rC13	fmpyfadd,sgl	ra2a, rB3a, rC23, rC23	fmpyfadd,sgl	ra3a, rB3a, rC33, rC33						fldd    48(pA3), rA3	fmpyfadd,sgl	ra0b, rB0b, rC00, rC00	fmpyfadd,sgl	ra1b, rB0b, rC10, rC10	fmpyfadd,sgl	ra2b, rB0b, rC20, rC20	fmpyfadd,sgl	ra3b, rB0b, rC30, rC30						fldd   	48(pB0), rB0	fmpyfadd,sgl	ra0b, rB1b, rC01, rC01	fmpyfadd,sgl	ra1b, rB1b, rC11, rC11	fmpyfadd,sgl	ra2b, rB1b, rC21, rC21	fmpyfadd,sgl	ra3b, rB1b, rC31, rC31						fldd      48(pB1), rB1	fmpyfadd,sgl	ra0b, rB2b, rC02, rC02	fmpyfadd,sgl	ra1b, rB2b, rC12, rC12	fmpyfadd,sgl	ra2b, rB2b, rC22, rC22	fmpyfadd,sgl	ra3b, rB2b, rC32, rC32						fldd      48(pB2), rB2	fmpyfadd,sgl	ra0b, rB3b, rC03, rC03	fmpyfadd,sgl	ra1b, rB3b, rC13, rC13	fmpyfadd,sgl	ra2b, rB3b, rC23, rC23	fmpyfadd,sgl	ra3b, rB3b, rC33, rC33						fldd    48(pB3), rB3	fmpyfadd,sgl	rA0a, rB0a, rC00, rC00	fmpyfadd,sgl	rA1a, rB0a, rC10, rC10	fmpyfadd,sgl	rA2a, rB0a, rC20, rC20	fmpyfadd,sgl	rA3a, rB0a, rC30, rC30						fldd   	56(pA0), ra0	fmpyfadd,sgl	rA0a, rB1a, rC01, rC01	fmpyfadd,sgl	rA1a, rB1a, rC11, rC11	fmpyfadd,sgl	rA2a, rB1a, rC21, rC21	fmpyfadd,sgl	rA3a, rB1a, rC31, rC31						fldd   	56(pA1), ra1	fmpyfadd,sgl	rA0a, rB2a, rC02, rC02	fmpyfadd,sgl	rA1a, rB2a, rC12, rC12	fmpyfadd,sgl	rA2a, rB2a, rC22, rC22	fmpyfadd,sgl	rA3a, rB2a, rC32, rC32						fldd   	56(pA2), ra2	fmpyfadd,sgl	rA0a, rB3a, rC03, rC03	fmpyfadd,sgl	rA1a, rB3a, rC13, rC13	fmpyfadd,sgl	rA2a, rB3a, rC23, rC23	fmpyfadd,sgl	rA3a, rB3a, rC33, rC33						fldd   	56(pA3), ra3	fmpyfadd,sgl	rA0b, rB0b, rC00, rC00	fmpyfadd,sgl	rA1b, rB0b, rC10, rC10	fmpyfadd,sgl	rA2b, rB0b, rC20, rC20	fmpyfadd,sgl	rA3b, rB0b, rC30, rC30						fldd   	56(pB0), rB0	fmpyfadd,sgl	rA0b, rB1b, rC01, rC01	fmpyfadd,sgl	rA1b, rB1b, rC11, rC11	fmpyfadd,sgl	rA2b, rB1b, rC21, rC21	fmpyfadd,sgl	rA3b, rB1b, rC31, rC31						fldd   	56(pB1), rB1	fmpyfadd,sgl	rA0b, rB2b, rC02, rC02	fmpyfadd,sgl	rA1b, rB2b, rC12, rC12	fmpyfadd,sgl	rA2b, rB2b, rC22, rC22	fmpyfadd,sgl	rA3b, rB2b, rC32, rC32						fldd   	56(pB2), rB2	fmpyfadd,sgl	rA0b, rB3b, rC03, rC03	fmpyfadd,sgl	rA1b, rB3b, rC13, rC13	fmpyfadd,sgl	rA2b, rB3b, rC23, rC23	fmpyfadd,sgl	rA3b, rB3b, rC33, rC33						fldd   	56(pB3), rB3	fmpyfadd,sgl	ra0a, rB0a, rC00, rC00	fmpyfadd,sgl	ra1a, rB0a, rC10, rC10						ldo	4*3*KB+64(pA0), pA0	fmpyfadd,sgl	ra2a, rB0a, rC20, rC20	fmpyfadd,sgl	ra3a, rB0a, rC30, rC30	fmpyfadd,sgl	ra0a, rB1a, rC01, rC01	fmpyfadd,sgl	ra1a, rB1a, rC11, rC11						ldo	4*3*KB+64(pA1), pA1	fmpyfadd,sgl	ra2a, rB1a, rC21, rC21	fmpyfadd,sgl	ra3a, rB1a, rC31, rC31							ldw,ma	64(pfA), %r0	fmpyfadd,sgl	ra0a, rB2a, rC02, rC02	fmpyfadd,sgl	ra1a, rB2a, rC12, rC12						ldo	4*3*KB+64(pA2), pA2	fmpyfadd,sgl	ra2a, rB2a, rC22, rC22	fmpyfadd,sgl	ra3a, rB2a, rC32, rC32	fmpyfadd,sgl	ra0a, rB3a, rC03, rC03	fmpyfadd,sgl	ra1a, rB3a, rC13, rC13						ldo	4*3*KB+64(pA3), pA3	fmpyfadd,sgl	ra2a, rB3a, rC23, rC23	fmpyfadd,sgl	ra3a, rB3a, rC33, rC33	fmpyfadd,sgl	ra0b, rB0b, rC00, rC00	fmpyfadd,sgl	ra1b, rB0b, rC10, rC10						ldo	-4*(KB-16)(pB0), pB0	fmpyfadd,sgl	ra2b, rB0b, rC20, rC20	fmpyfadd,sgl	ra3b, rB0b, rC30, rC30							ldw,ma	64(pfB), %r0	fmpyfadd,sgl	ra0b, rB1b, rC01, rC01	fmpyfadd,sgl	ra1b, rB1b, rC11, rC11							ldd   	64(pC0), %r0	fmpyfadd,sgl	ra2b, rB1b, rC21, rC21	fmpyfadd,sgl	ra3b, rB1b, rC31, rC31							ldd   	64(pC1), %r0	fmpyfadd,sgl	ra0b, rB2b, rC02, rC02	fmpyfadd,sgl	ra1b, rB2b, rC12, rC12						ldo	-4*(KB-16)(pB1), pB1	fmpyfadd,sgl	ra2b, rB2b, rC22, rC22	fmpyfadd,sgl	ra3b, rB2b, rC32, rC32							ldd   	64(pC2), %r0	fmpyfadd,sgl	ra0b, rB3b, rC03, rC03	fmpyfadd,sgl	ra1b, rB3b, rC13, rC13						ldo	-4*(KB-16)(pB2), pB2	fmpyfadd,sgl	ra2b, rB3b, rC23, rC23	fmpyfadd,sgl	ra3b, rB3b, rC33, rC33						ldo	-4*(KB-16)(pB3), pB3							ldd   	64(pC3), %r0;;       end drain KLOOP;;;       Write to C;	fstw	rC00,  0(pC0)	fstw	rC10,  CMUL(4)(pC0)	fstw	rC20,  CMUL(8)(pC0)	fstw	rC30, CMUL(12)(pC0)					ldo	CMUL(16)(pC0), pC0	fstw	rC01,  0(pC1)	fstw	rC11,  CMUL(4)(pC1)	fstw	rC21,  CMUL(8)(pC1)	fstw	rC31, CMUL(12)(pC1)					ldo	CMUL(16)(pC1), pC1	fstw	rC02,  0(pC2)	fstw	rC12,  CMUL(4)(pC2)	fstw	rC22,  CMUL(8)(pC2)	fstw	rC32, CMUL(12)(pC2)					ldo 	CMUL(16)(pC2), pC2	fstw	rC03,  0(pC3)	fstw	rC13,  CMUL(4)(pC3)	fstw	rC23,  CMUL(8)(pC3)	fstw	rC33, CMUL(12)(pC3);;       while (M);;;	ldo	-6(rM), rM;	cmpib,<> 0, rM, MLOOP	addib,<>	-4, rM, MLOOP					ldo	CMUL(16)(pC3), pC3	ldo	4*4*KB(pB0), pB0	ldo	4*4*KB(pB1), pB1	ldo	4*4*KB(pB2), pB2	ldo	4*4*KB(pB3), pB3	sub	pA0, incAn, pA0	sub	pA1, incAn, pA1	sub	pA2, incAn, pA2	sub	pA3, incAn, pA3	add	pC0, incCn, pC0	add	pC1, incCn, pC1	add	pC2, incCn, pC2;;       while (N);;	addib,<>	-4, rN, NLOOP	add	pC3, incCn, pC3/* *      Restore regs and return */	fldd	-8(%r30), %fr12	fldd	-16(%r30), %fr13	fldd	-24(%r30), %fr14	fldd	-32(%r30), %fr15	fldd	-40(%r30), %fr16	fldd	-48(%r30), %fr17	fldd	-56(%r30), %fr18	fldd	-64(%r30), %fr19	fldd	-72(%r30), %fr20	fldd	-80(%r30), %fr21	ldw	-84(%r30), %r3	ldw	-88(%r30), %r4	ldw	-92(%r30), %r5	ldw	-96(%r30), %r6	ldw	-100(%r30), %r7	ldw	-104(%r30), %r8	ldw	-108(%r30), %r9	ldw	-112(%r30), %r10	ldw	-116(%r30), %r11	ldw	-120(%r30), %r12	ldw	-124(%r30), %r13	bve (%r2)	ldo	-FSIZE(%r30), %r30	.EXIT	.PROCEND

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?