atl_smm4x4x16_hppa.c
来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 1,134 行 · 第 1/2 页
C
1,134 行
fmpyfadd,sgl rA3a, rB0a, rC30, rC30 fldd 56(pA0), ra0 fmpyfadd,sgl rA0a, rB1a, rC01, rC01 fmpyfadd,sgl rA1a, rB1a, rC11, rC11 fmpyfadd,sgl rA2a, rB1a, rC21, rC21 fmpyfadd,sgl rA3a, rB1a, rC31, rC31 fldd 56(pA1), ra1 fmpyfadd,sgl rA0a, rB2a, rC02, rC02 fmpyfadd,sgl rA1a, rB2a, rC12, rC12 fmpyfadd,sgl rA2a, rB2a, rC22, rC22 fmpyfadd,sgl rA3a, rB2a, rC32, rC32 fldd 56(pA2), ra2 fmpyfadd,sgl rA0a, rB3a, rC03, rC03 fmpyfadd,sgl rA1a, rB3a, rC13, rC13 fmpyfadd,sgl rA2a, rB3a, rC23, rC23 fmpyfadd,sgl rA3a, rB3a, rC33, rC33 fldd 56(pA3), ra3 fmpyfadd,sgl rA0b, rB0b, rC00, rC00 fmpyfadd,sgl rA1b, rB0b, rC10, rC10 fmpyfadd,sgl rA2b, rB0b, rC20, rC20 fmpyfadd,sgl rA3b, rB0b, rC30, rC30 fldd 56(pB0), rB0 fmpyfadd,sgl rA0b, rB1b, rC01, rC01 fmpyfadd,sgl rA1b, rB1b, rC11, rC11 fmpyfadd,sgl rA2b, rB1b, rC21, rC21 fmpyfadd,sgl rA3b, rB1b, rC31, rC31 fldd 56(pB1), rB1 fmpyfadd,sgl rA0b, rB2b, rC02, rC02 fmpyfadd,sgl rA1b, rB2b, rC12, rC12 fmpyfadd,sgl rA2b, rB2b, rC22, rC22 fmpyfadd,sgl rA3b, rB2b, rC32, rC32 fldd 56(pB2), rB2 fmpyfadd,sgl rA0b, rB3b, rC03, rC03 fmpyfadd,sgl rA1b, rB3b, rC13, rC13 fmpyfadd,sgl rA2b, rB3b, rC23, rC23 fmpyfadd,sgl rA3b, rB3b, rC33, rC33 fldd 56(pB3), rB3 fmpyfadd,sgl ra0a, rB0a, rC00, rC00 fmpyfadd,sgl ra1a, rB0a, rC10, rC10 fmpyfadd,sgl ra2a, rB0a, rC20, rC20 fmpyfadd,sgl ra3a, rB0a, rC30, rC30 fldd,mb 64(pA0), rA0 fmpyfadd,sgl ra0a, rB1a, rC01, rC01 fmpyfadd,sgl ra1a, rB1a, rC11, rC11 fmpyfadd,sgl ra2a, rB1a, rC21, rC21 fmpyfadd,sgl ra3a, rB1a, rC31, rC31 fldd,mb 64(pA1), rA1 fmpyfadd,sgl ra0a, rB2a, rC02, rC02 fmpyfadd,sgl ra1a, rB2a, rC12, rC12 fmpyfadd,sgl ra2a, rB2a, rC22, rC22 fmpyfadd,sgl ra3a, rB2a, rC32, rC32 fldd,mb 64(pA2), rA2 fmpyfadd,sgl ra0a, rB3a, rC03, rC03 fmpyfadd,sgl ra1a, rB3a, rC13, rC13 fmpyfadd,sgl ra2a, rB3a, rC23, rC23 fmpyfadd,sgl ra3a, rB3a, rC33, rC33 fldd,mb 64(pA3), rA3 fmpyfadd,sgl ra0b, rB0b, rC00, rC00 fmpyfadd,sgl ra1b, rB0b, rC10, rC10 fmpyfadd,sgl ra2b, rB0b, rC20, rC20 fmpyfadd,sgl ra3b, rB0b, rC30, rC30 fldd,mb 64(pB0), rB0 fmpyfadd,sgl ra0b, rB1b, rC01, rC01 fmpyfadd,sgl ra1b, rB1b, rC11, rC11 fmpyfadd,sgl ra2b, rB1b, rC21, rC21 fmpyfadd,sgl ra3b, rB1b, rC31, rC31 fldd,mb 64(pB1), rB1 fmpyfadd,sgl ra0b, rB2b, rC02, rC02 fmpyfadd,sgl ra1b, rB2b, rC12, rC12 fmpyfadd,sgl ra2b, rB2b, rC22, rC22 fmpyfadd,sgl ra3b, rB2b, rC32, rC32 fldd,mb 64(pB2), rB2 fmpyfadd,sgl ra0b, rB3b, rC03, rC03 fmpyfadd,sgl ra1b, rB3b, rC13, rC13 fmpyfadd,sgl ra2b, rB3b, rC23, rC23 fmpyfadd,sgl ra3b, rB3b, rC33, rC33;; while (--k);; addib,<> -16, rK, KLOOP fldd,mb 64(pB3), rB3; nop#endif;; Drain pipe; fmpyfadd,sgl rA0a, rB0a, rC00, rC00 fmpyfadd,sgl rA1a, rB0a, rC10, rC10 fmpyfadd,sgl rA2a, rB0a, rC20, rC20 fmpyfadd,sgl rA3a, rB0a, rC30, rC30 fldd 8(pA0), ra0 fmpyfadd,sgl rA0a, rB1a, rC01, rC01 fmpyfadd,sgl rA1a, rB1a, rC11, rC11 fmpyfadd,sgl rA2a, rB1a, rC21, rC21 fmpyfadd,sgl rA3a, rB1a, rC31, rC31 fldd 8(pA1), ra1 fmpyfadd,sgl rA0a, rB2a, rC02, rC02 fmpyfadd,sgl rA1a, rB2a, rC12, rC12 fmpyfadd,sgl rA2a, rB2a, rC22, rC22 fmpyfadd,sgl rA3a, rB2a, rC32, rC32 fldd 8(pA2), ra2 fmpyfadd,sgl rA0a, rB3a, rC03, rC03 fmpyfadd,sgl rA1a, rB3a, rC13, rC13 fmpyfadd,sgl rA2a, rB3a, rC23, rC23 fmpyfadd,sgl rA3a, rB3a, rC33, rC33 fldd 8(pA3), ra3 fmpyfadd,sgl rA0b, rB0b, rC00, rC00 fmpyfadd,sgl rA1b, rB0b, rC10, rC10 fmpyfadd,sgl rA2b, rB0b, rC20, rC20 fmpyfadd,sgl rA3b, rB0b, rC30, rC30 fldd 8(pB0), rB0 fmpyfadd,sgl rA0b, rB1b, rC01, rC01 fmpyfadd,sgl rA1b, rB1b, rC11, rC11 fmpyfadd,sgl rA2b, rB1b, rC21, rC21 fmpyfadd,sgl rA3b, rB1b, rC31, rC31 fldd 8(pB1), rB1 fmpyfadd,sgl rA0b, rB2b, rC02, rC02 fmpyfadd,sgl rA1b, rB2b, rC12, rC12 fmpyfadd,sgl rA2b, rB2b, rC22, rC22 fmpyfadd,sgl rA3b, rB2b, rC32, rC32 fldd 8(pB2), rB2 fmpyfadd,sgl rA0b, rB3b, rC03, rC03 fmpyfadd,sgl rA1b, rB3b, rC13, rC13 fmpyfadd,sgl rA2b, rB3b, rC23, rC23 fmpyfadd,sgl rA3b, rB3b, rC33, rC33 fldd 8(pB3), rB3 fmpyfadd,sgl ra0a, rB0a, rC00, rC00 fmpyfadd,sgl ra1a, rB0a, rC10, rC10 fmpyfadd,sgl ra2a, rB0a, rC20, rC20 fmpyfadd,sgl ra3a, rB0a, rC30, rC30 fldd 16(pA0), rA0 fmpyfadd,sgl ra0a, rB1a, rC01, rC01 fmpyfadd,sgl ra1a, rB1a, rC11, rC11 fmpyfadd,sgl ra2a, rB1a, rC21, rC21 fmpyfadd,sgl ra3a, rB1a, rC31, rC31 fldd 16(pA1), rA1 fmpyfadd,sgl ra0a, rB2a, rC02, rC02 fmpyfadd,sgl ra1a, rB2a, rC12, rC12 fmpyfadd,sgl ra2a, rB2a, rC22, rC22 fmpyfadd,sgl ra3a, rB2a, rC32, rC32 fldd 16(pA2), rA2 fmpyfadd,sgl ra0a, rB3a, rC03, rC03 fmpyfadd,sgl ra1a, rB3a, rC13, rC13 fmpyfadd,sgl ra2a, rB3a, rC23, rC23 fmpyfadd,sgl ra3a, rB3a, rC33, rC33 fldd 16(pA3), rA3 fmpyfadd,sgl ra0b, rB0b, rC00, rC00 fmpyfadd,sgl ra1b, rB0b, rC10, rC10 fmpyfadd,sgl ra2b, rB0b, rC20, rC20 fmpyfadd,sgl ra3b, rB0b, rC30, rC30 fldd 16(pB0), rB0 fmpyfadd,sgl ra0b, rB1b, rC01, rC01 fmpyfadd,sgl ra1b, rB1b, rC11, rC11 fmpyfadd,sgl ra2b, rB1b, rC21, rC21 fmpyfadd,sgl ra3b, rB1b, rC31, rC31 fldd 16(pB1), rB1 fmpyfadd,sgl ra0b, rB2b, rC02, rC02 fmpyfadd,sgl ra1b, rB2b, rC12, rC12 fmpyfadd,sgl ra2b, rB2b, rC22, rC22 fmpyfadd,sgl ra3b, rB2b, rC32, rC32 fldd 16(pB2), rB2 fmpyfadd,sgl ra0b, rB3b, rC03, rC03 fmpyfadd,sgl ra1b, rB3b, rC13, rC13 fmpyfadd,sgl ra2b, rB3b, rC23, rC23 fmpyfadd,sgl ra3b, rB3b, rC33, rC33 fldd 16(pB3), rB3 fmpyfadd,sgl rA0a, rB0a, rC00, rC00 fmpyfadd,sgl rA1a, rB0a, rC10, rC10 fmpyfadd,sgl rA2a, rB0a, rC20, rC20 fmpyfadd,sgl rA3a, rB0a, rC30, rC30 fldd 24(pA0), ra0 fmpyfadd,sgl rA0a, rB1a, rC01, rC01 fmpyfadd,sgl rA1a, rB1a, rC11, rC11 fmpyfadd,sgl rA2a, rB1a, rC21, rC21 fmpyfadd,sgl rA3a, rB1a, rC31, rC31 fldd 24(pA1), ra1 fmpyfadd,sgl rA0a, rB2a, rC02, rC02 fmpyfadd,sgl rA1a, rB2a, rC12, rC12 fmpyfadd,sgl rA2a, rB2a, rC22, rC22 fmpyfadd,sgl rA3a, rB2a, rC32, rC32 fldd 24(pA2), ra2 fmpyfadd,sgl rA0a, rB3a, rC03, rC03 fmpyfadd,sgl rA1a, rB3a, rC13, rC13 fmpyfadd,sgl rA2a, rB3a, rC23, rC23 fmpyfadd,sgl rA3a, rB3a, rC33, rC33 fldd 24(pA3), ra3 fmpyfadd,sgl rA0b, rB0b, rC00, rC00 fmpyfadd,sgl rA1b, rB0b, rC10, rC10 fmpyfadd,sgl rA2b, rB0b, rC20, rC20 fmpyfadd,sgl rA3b, rB0b, rC30, rC30 fldd 24(pB0), rB0 fmpyfadd,sgl rA0b, rB1b, rC01, rC01 fmpyfadd,sgl rA1b, rB1b, rC11, rC11 fmpyfadd,sgl rA2b, rB1b, rC21, rC21 fmpyfadd,sgl rA3b, rB1b, rC31, rC31 fldd 24(pB1), rB1 fmpyfadd,sgl rA0b, rB2b, rC02, rC02 fmpyfadd,sgl rA1b, rB2b, rC12, rC12 fmpyfadd,sgl rA2b, rB2b, rC22, rC22 fmpyfadd,sgl rA3b, rB2b, rC32, rC32 fldd 24(pB2), rB2 fmpyfadd,sgl rA0b, rB3b, rC03, rC03 fmpyfadd,sgl rA1b, rB3b, rC13, rC13 fmpyfadd,sgl rA2b, rB3b, rC23, rC23 fmpyfadd,sgl rA3b, rB3b, rC33, rC33 fldd 24(pB3), rB3 fmpyfadd,sgl ra0a, rB0a, rC00, rC00 fmpyfadd,sgl ra1a, rB0a, rC10, rC10 fmpyfadd,sgl ra2a, rB0a, rC20, rC20 fmpyfadd,sgl ra3a, rB0a, rC30, rC30 fldd 32(pA0), rA0 fmpyfadd,sgl ra0a, rB1a, rC01, rC01 fmpyfadd,sgl ra1a, rB1a, rC11, rC11 fmpyfadd,sgl ra2a, rB1a, rC21, rC21 fmpyfadd,sgl ra3a, rB1a, rC31, rC31 fldd 32(pA1), rA1 fmpyfadd,sgl ra0a, rB2a, rC02, rC02 fmpyfadd,sgl ra1a, rB2a, rC12, rC12 fmpyfadd,sgl ra2a, rB2a, rC22, rC22 fmpyfadd,sgl ra3a, rB2a, rC32, rC32 fldd 32(pA2), rA2 fmpyfadd,sgl ra0a, rB3a, rC03, rC03 fmpyfadd,sgl ra1a, rB3a, rC13, rC13 fmpyfadd,sgl ra2a, rB3a, rC23, rC23 fmpyfadd,sgl ra3a, rB3a, rC33, rC33 fldd 32(pA3), rA3 fmpyfadd,sgl ra0b, rB0b, rC00, rC00 fmpyfadd,sgl ra1b, rB0b, rC10, rC10 fmpyfadd,sgl ra2b, rB0b, rC20, rC20 fmpyfadd,sgl ra3b, rB0b, rC30, rC30 fldd 32(pB0), rB0 fmpyfadd,sgl ra0b, rB1b, rC01, rC01 fmpyfadd,sgl ra1b, rB1b, rC11, rC11 fmpyfadd,sgl ra2b, rB1b, rC21, rC21 fmpyfadd,sgl ra3b, rB1b, rC31, rC31 fldd 32(pB1), rB1 fmpyfadd,sgl ra0b, rB2b, rC02, rC02 fmpyfadd,sgl ra1b, rB2b, rC12, rC12 fmpyfadd,sgl ra2b, rB2b, rC22, rC22 fmpyfadd,sgl ra3b, rB2b, rC32, rC32 fldd 32(pB2), rB2 fmpyfadd,sgl ra0b, rB3b, rC03, rC03 fmpyfadd,sgl ra1b, rB3b, rC13, rC13 fmpyfadd,sgl ra2b, rB3b, rC23, rC23 fmpyfadd,sgl ra3b, rB3b, rC33, rC33 fldd 32(pB3), rB3 fmpyfadd,sgl rA0a, rB0a, rC00, rC00 fmpyfadd,sgl rA1a, rB0a, rC10, rC10 fmpyfadd,sgl rA2a, rB0a, rC20, rC20 fmpyfadd,sgl rA3a, rB0a, rC30, rC30 fldd 40(pA0), ra0 fmpyfadd,sgl rA0a, rB1a, rC01, rC01 fmpyfadd,sgl rA1a, rB1a, rC11, rC11 fmpyfadd,sgl rA2a, rB1a, rC21, rC21 fmpyfadd,sgl rA3a, rB1a, rC31, rC31 fldd 40(pA1), ra1 fmpyfadd,sgl rA0a, rB2a, rC02, rC02 fmpyfadd,sgl rA1a, rB2a, rC12, rC12 fmpyfadd,sgl rA2a, rB2a, rC22, rC22 fmpyfadd,sgl rA3a, rB2a, rC32, rC32 fldd 40(pA2), ra2 fmpyfadd,sgl rA0a, rB3a, rC03, rC03 fmpyfadd,sgl rA1a, rB3a, rC13, rC13 fmpyfadd,sgl rA2a, rB3a, rC23, rC23 fmpyfadd,sgl rA3a, rB3a, rC33, rC33 fldd 40(pA3), ra3 fmpyfadd,sgl rA0b, rB0b, rC00, rC00 fmpyfadd,sgl rA1b, rB0b, rC10, rC10 fmpyfadd,sgl rA2b, rB0b, rC20, rC20 fmpyfadd,sgl rA3b, rB0b, rC30, rC30 fldd 40(pB0), rB0 fmpyfadd,sgl rA0b, rB1b, rC01, rC01 fmpyfadd,sgl rA1b, rB1b, rC11, rC11 fmpyfadd,sgl rA2b, rB1b, rC21, rC21 fmpyfadd,sgl rA3b, rB1b, rC31, rC31 fldd 40(pB1), rB1 fmpyfadd,sgl rA0b, rB2b, rC02, rC02 fmpyfadd,sgl rA1b, rB2b, rC12, rC12 fmpyfadd,sgl rA2b, rB2b, rC22, rC22 fmpyfadd,sgl rA3b, rB2b, rC32, rC32 fldd 40(pB2), rB2 fmpyfadd,sgl rA0b, rB3b, rC03, rC03 fmpyfadd,sgl rA1b, rB3b, rC13, rC13 fmpyfadd,sgl rA2b, rB3b, rC23, rC23 fmpyfadd,sgl rA3b, rB3b, rC33, rC33 fldd 40(pB3), rB3 fmpyfadd,sgl ra0a, rB0a, rC00, rC00 fmpyfadd,sgl ra1a, rB0a, rC10, rC10 fmpyfadd,sgl ra2a, rB0a, rC20, rC20 fmpyfadd,sgl ra3a, rB0a, rC30, rC30 fldd 48(pA0), rA0 fmpyfadd,sgl ra0a, rB1a, rC01, rC01 fmpyfadd,sgl ra1a, rB1a, rC11, rC11 fmpyfadd,sgl ra2a, rB1a, rC21, rC21 fmpyfadd,sgl ra3a, rB1a, rC31, rC31 fldd 48(pA1), rA1 fmpyfadd,sgl ra0a, rB2a, rC02, rC02 fmpyfadd,sgl ra1a, rB2a, rC12, rC12 fmpyfadd,sgl ra2a, rB2a, rC22, rC22 fmpyfadd,sgl ra3a, rB2a, rC32, rC32 fldd 48(pA2), rA2 fmpyfadd,sgl ra0a, rB3a, rC03, rC03 fmpyfadd,sgl ra1a, rB3a, rC13, rC13 fmpyfadd,sgl ra2a, rB3a, rC23, rC23 fmpyfadd,sgl ra3a, rB3a, rC33, rC33 fldd 48(pA3), rA3 fmpyfadd,sgl ra0b, rB0b, rC00, rC00 fmpyfadd,sgl ra1b, rB0b, rC10, rC10 fmpyfadd,sgl ra2b, rB0b, rC20, rC20 fmpyfadd,sgl ra3b, rB0b, rC30, rC30 fldd 48(pB0), rB0 fmpyfadd,sgl ra0b, rB1b, rC01, rC01 fmpyfadd,sgl ra1b, rB1b, rC11, rC11 fmpyfadd,sgl ra2b, rB1b, rC21, rC21 fmpyfadd,sgl ra3b, rB1b, rC31, rC31 fldd 48(pB1), rB1 fmpyfadd,sgl ra0b, rB2b, rC02, rC02 fmpyfadd,sgl ra1b, rB2b, rC12, rC12 fmpyfadd,sgl ra2b, rB2b, rC22, rC22 fmpyfadd,sgl ra3b, rB2b, rC32, rC32 fldd 48(pB2), rB2 fmpyfadd,sgl ra0b, rB3b, rC03, rC03 fmpyfadd,sgl ra1b, rB3b, rC13, rC13 fmpyfadd,sgl ra2b, rB3b, rC23, rC23 fmpyfadd,sgl ra3b, rB3b, rC33, rC33 fldd 48(pB3), rB3 fmpyfadd,sgl rA0a, rB0a, rC00, rC00 fmpyfadd,sgl rA1a, rB0a, rC10, rC10 fmpyfadd,sgl rA2a, rB0a, rC20, rC20 fmpyfadd,sgl rA3a, rB0a, rC30, rC30 fldd 56(pA0), ra0 fmpyfadd,sgl rA0a, rB1a, rC01, rC01 fmpyfadd,sgl rA1a, rB1a, rC11, rC11 fmpyfadd,sgl rA2a, rB1a, rC21, rC21 fmpyfadd,sgl rA3a, rB1a, rC31, rC31 fldd 56(pA1), ra1 fmpyfadd,sgl rA0a, rB2a, rC02, rC02 fmpyfadd,sgl rA1a, rB2a, rC12, rC12 fmpyfadd,sgl rA2a, rB2a, rC22, rC22 fmpyfadd,sgl rA3a, rB2a, rC32, rC32 fldd 56(pA2), ra2 fmpyfadd,sgl rA0a, rB3a, rC03, rC03 fmpyfadd,sgl rA1a, rB3a, rC13, rC13 fmpyfadd,sgl rA2a, rB3a, rC23, rC23 fmpyfadd,sgl rA3a, rB3a, rC33, rC33 fldd 56(pA3), ra3 fmpyfadd,sgl rA0b, rB0b, rC00, rC00 fmpyfadd,sgl rA1b, rB0b, rC10, rC10 fmpyfadd,sgl rA2b, rB0b, rC20, rC20 fmpyfadd,sgl rA3b, rB0b, rC30, rC30 fldd 56(pB0), rB0 fmpyfadd,sgl rA0b, rB1b, rC01, rC01 fmpyfadd,sgl rA1b, rB1b, rC11, rC11 fmpyfadd,sgl rA2b, rB1b, rC21, rC21 fmpyfadd,sgl rA3b, rB1b, rC31, rC31 fldd 56(pB1), rB1 fmpyfadd,sgl rA0b, rB2b, rC02, rC02 fmpyfadd,sgl rA1b, rB2b, rC12, rC12 fmpyfadd,sgl rA2b, rB2b, rC22, rC22 fmpyfadd,sgl rA3b, rB2b, rC32, rC32 fldd 56(pB2), rB2 fmpyfadd,sgl rA0b, rB3b, rC03, rC03 fmpyfadd,sgl rA1b, rB3b, rC13, rC13 fmpyfadd,sgl rA2b, rB3b, rC23, rC23 fmpyfadd,sgl rA3b, rB3b, rC33, rC33 fldd 56(pB3), rB3 fmpyfadd,sgl ra0a, rB0a, rC00, rC00 fmpyfadd,sgl ra1a, rB0a, rC10, rC10 ldo 4*3*KB+64(pA0), pA0 fmpyfadd,sgl ra2a, rB0a, rC20, rC20 fmpyfadd,sgl ra3a, rB0a, rC30, rC30 fmpyfadd,sgl ra0a, rB1a, rC01, rC01 fmpyfadd,sgl ra1a, rB1a, rC11, rC11 ldo 4*3*KB+64(pA1), pA1 fmpyfadd,sgl ra2a, rB1a, rC21, rC21 fmpyfadd,sgl ra3a, rB1a, rC31, rC31 ldw,ma 64(pfA), %r0 fmpyfadd,sgl ra0a, rB2a, rC02, rC02 fmpyfadd,sgl ra1a, rB2a, rC12, rC12 ldo 4*3*KB+64(pA2), pA2 fmpyfadd,sgl ra2a, rB2a, rC22, rC22 fmpyfadd,sgl ra3a, rB2a, rC32, rC32 fmpyfadd,sgl ra0a, rB3a, rC03, rC03 fmpyfadd,sgl ra1a, rB3a, rC13, rC13 ldo 4*3*KB+64(pA3), pA3 fmpyfadd,sgl ra2a, rB3a, rC23, rC23 fmpyfadd,sgl ra3a, rB3a, rC33, rC33 fmpyfadd,sgl ra0b, rB0b, rC00, rC00 fmpyfadd,sgl ra1b, rB0b, rC10, rC10 ldo -4*(KB-16)(pB0), pB0 fmpyfadd,sgl ra2b, rB0b, rC20, rC20 fmpyfadd,sgl ra3b, rB0b, rC30, rC30 ldw,ma 64(pfB), %r0 fmpyfadd,sgl ra0b, rB1b, rC01, rC01 fmpyfadd,sgl ra1b, rB1b, rC11, rC11 ldd 64(pC0), %r0 fmpyfadd,sgl ra2b, rB1b, rC21, rC21 fmpyfadd,sgl ra3b, rB1b, rC31, rC31 ldd 64(pC1), %r0 fmpyfadd,sgl ra0b, rB2b, rC02, rC02 fmpyfadd,sgl ra1b, rB2b, rC12, rC12 ldo -4*(KB-16)(pB1), pB1 fmpyfadd,sgl ra2b, rB2b, rC22, rC22 fmpyfadd,sgl ra3b, rB2b, rC32, rC32 ldd 64(pC2), %r0 fmpyfadd,sgl ra0b, rB3b, rC03, rC03 fmpyfadd,sgl ra1b, rB3b, rC13, rC13 ldo -4*(KB-16)(pB2), pB2 fmpyfadd,sgl ra2b, rB3b, rC23, rC23 fmpyfadd,sgl ra3b, rB3b, rC33, rC33 ldo -4*(KB-16)(pB3), pB3 ldd 64(pC3), %r0;; end drain KLOOP;;; Write to C; fstw rC00, 0(pC0) fstw rC10, CMUL(4)(pC0) fstw rC20, CMUL(8)(pC0) fstw rC30, CMUL(12)(pC0) ldo CMUL(16)(pC0), pC0 fstw rC01, 0(pC1) fstw rC11, CMUL(4)(pC1) fstw rC21, CMUL(8)(pC1) fstw rC31, CMUL(12)(pC1) ldo CMUL(16)(pC1), pC1 fstw rC02, 0(pC2) fstw rC12, CMUL(4)(pC2) fstw rC22, CMUL(8)(pC2) fstw rC32, CMUL(12)(pC2) ldo CMUL(16)(pC2), pC2 fstw rC03, 0(pC3) fstw rC13, CMUL(4)(pC3) fstw rC23, CMUL(8)(pC3) fstw rC33, CMUL(12)(pC3);; while (M);;; ldo -6(rM), rM; cmpib,<> 0, rM, MLOOP addib,<> -4, rM, MLOOP ldo CMUL(16)(pC3), pC3 ldo 4*4*KB(pB0), pB0 ldo 4*4*KB(pB1), pB1 ldo 4*4*KB(pB2), pB2 ldo 4*4*KB(pB3), pB3 sub pA0, incAn, pA0 sub pA1, incAn, pA1 sub pA2, incAn, pA2 sub pA3, incAn, pA3 add pC0, incCn, pC0 add pC1, incCn, pC1 add pC2, incCn, pC2;; while (N);; addib,<> -4, rN, NLOOP add pC3, incCn, pC3/* * Restore regs and return */ fldd -8(%r30), %fr12 fldd -16(%r30), %fr13 fldd -24(%r30), %fr14 fldd -32(%r30), %fr15 fldd -40(%r30), %fr16 fldd -48(%r30), %fr17 fldd -56(%r30), %fr18 fldd -64(%r30), %fr19 fldd -72(%r30), %fr20 fldd -80(%r30), %fr21 ldw -84(%r30), %r3 ldw -88(%r30), %r4 ldw -92(%r30), %r5 ldw -96(%r30), %r6 ldw -100(%r30), %r7 ldw -104(%r30), %r8 ldw -108(%r30), %r9 ldw -112(%r30), %r10 ldw -116(%r30), %r11 ldw -120(%r30), %r12 ldw -124(%r30), %r13 bve (%r2) ldo -FSIZE(%r30), %r30 .EXIT .PROCEND
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?