atl_dmm4x4x16_hppa.c
来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 1,251 行 · 第 1/3 页
C
1,251 行
fmpyfadd,dbl ra0, rB1, rC01, rC01 fmpyfadd,dbl ra1, rB1, rC11, rC11 fldd 48(pA1), rA1 fmpyfadd,dbl ra2, rB1, rC21, rC21 fmpyfadd,dbl ra3, rB1, rC31, rC31 fldd 48(pB1), rB1 fmpyfadd,dbl ra0, rB2, rC02, rC02 fmpyfadd,dbl ra1, rB2, rC12, rC12 fldd 48(pA2), rA2 fmpyfadd,dbl ra2, rB2, rC22, rC22 fmpyfadd,dbl ra3, rB2, rC32, rC32 fldd 48(pB2), rB2 fmpyfadd,dbl ra0, rB3, rC03, rC03 fmpyfadd,dbl ra1, rB3, rC13, rC13 fldd 48(pA3), rA3 fmpyfadd,dbl ra2, rB3, rC23, rC23 fmpyfadd,dbl ra3, rB3, rC33, rC33 fldd 48(pB3), rB3 fmpyfadd,dbl rA0, rB0, rC00, rC00 fmpyfadd,dbl rA1, rB0, rC10, rC10 fldd 56(pA0), ra0 fmpyfadd,dbl rA2, rB0, rC20, rC20 fmpyfadd,dbl rA3, rB0, rC30, rC30 fldd 56(pB0), rB0 fmpyfadd,dbl rA0, rB1, rC01, rC01 fmpyfadd,dbl rA1, rB1, rC11, rC11 fldd 56(pA1), ra1 fmpyfadd,dbl rA2, rB1, rC21, rC21 fmpyfadd,dbl rA3, rB1, rC31, rC31 fldd 56(pB1), rB1 fmpyfadd,dbl rA0, rB2, rC02, rC02 fmpyfadd,dbl rA1, rB2, rC12, rC12 fldd 56(pA2), ra2 fmpyfadd,dbl rA2, rB2, rC22, rC22 fmpyfadd,dbl rA3, rB2, rC32, rC32 fldd 56(pB2), rB2 fmpyfadd,dbl rA0, rB3, rC03, rC03 fmpyfadd,dbl rA1, rB3, rC13, rC13 fldd 56(pA3), ra3 fmpyfadd,dbl rA2, rB3, rC23, rC23 fmpyfadd,dbl rA3, rB3, rC33, rC33 fldd 56(pB3), rB3 fmpyfadd,dbl ra0, rB0, rC00, rC00 fmpyfadd,dbl ra1, rB0, rC10, rC10 fldd 64(pA0), rA0 fmpyfadd,dbl ra2, rB0, rC20, rC20 fmpyfadd,dbl ra3, rB0, rC30, rC30 fldd 64(pB0), rB0 fmpyfadd,dbl ra0, rB1, rC01, rC01 fmpyfadd,dbl ra1, rB1, rC11, rC11 fldd 64(pA1), rA1 fmpyfadd,dbl ra2, rB1, rC21, rC21 fmpyfadd,dbl ra3, rB1, rC31, rC31 fldd 64(pB1), rB1 fmpyfadd,dbl ra0, rB2, rC02, rC02 fmpyfadd,dbl ra1, rB2, rC12, rC12 fldd 64(pA2), rA2 fmpyfadd,dbl ra2, rB2, rC22, rC22 fmpyfadd,dbl ra3, rB2, rC32, rC32 fldd 64(pB2), rB2 fmpyfadd,dbl ra0, rB3, rC03, rC03 fmpyfadd,dbl ra1, rB3, rC13, rC13 fldd 64(pA3), rA3 fmpyfadd,dbl ra2, rB3, rC23, rC23 fmpyfadd,dbl ra3, rB3, rC33, rC33 fldd 64(pB3), rB3 fmpyfadd,dbl rA0, rB0, rC00, rC00 fmpyfadd,dbl rA1, rB0, rC10, rC10 fldd 72(pA0), ra0 fmpyfadd,dbl rA2, rB0, rC20, rC20 fmpyfadd,dbl rA3, rB0, rC30, rC30 fldd 72(pB0), rB0 fmpyfadd,dbl rA0, rB1, rC01, rC01 fmpyfadd,dbl rA1, rB1, rC11, rC11 fldd 72(pA1), ra1 fmpyfadd,dbl rA2, rB1, rC21, rC21 fmpyfadd,dbl rA3, rB1, rC31, rC31 fldd 72(pB1), rB1 fmpyfadd,dbl rA0, rB2, rC02, rC02 fmpyfadd,dbl rA1, rB2, rC12, rC12 fldd 72(pA2), ra2 fmpyfadd,dbl rA2, rB2, rC22, rC22 fmpyfadd,dbl rA3, rB2, rC32, rC32 fldd 72(pB2), rB2 fmpyfadd,dbl rA0, rB3, rC03, rC03 fmpyfadd,dbl rA1, rB3, rC13, rC13 fldd 72(pA3), ra3 fmpyfadd,dbl rA2, rB3, rC23, rC23 fmpyfadd,dbl rA3, rB3, rC33, rC33 fldd 72(pB3), rB3 fmpyfadd,dbl ra0, rB0, rC00, rC00 fmpyfadd,dbl ra1, rB0, rC10, rC10 fldd 80(pA0), rA0 fmpyfadd,dbl ra2, rB0, rC20, rC20 fmpyfadd,dbl ra3, rB0, rC30, rC30 fldd 80(pB0), rB0 fmpyfadd,dbl ra0, rB1, rC01, rC01 fmpyfadd,dbl ra1, rB1, rC11, rC11 fldd 80(pA1), rA1 fmpyfadd,dbl ra2, rB1, rC21, rC21 fmpyfadd,dbl ra3, rB1, rC31, rC31 fldd 80(pB1), rB1 fmpyfadd,dbl ra0, rB2, rC02, rC02 fmpyfadd,dbl ra1, rB2, rC12, rC12 fldd 80(pA2), rA2 fmpyfadd,dbl ra2, rB2, rC22, rC22 fmpyfadd,dbl ra3, rB2, rC32, rC32 fldd 80(pB2), rB2 fmpyfadd,dbl ra0, rB3, rC03, rC03 fmpyfadd,dbl ra1, rB3, rC13, rC13 fldd 80(pA3), rA3 fmpyfadd,dbl ra2, rB3, rC23, rC23 fmpyfadd,dbl ra3, rB3, rC33, rC33 fldd 80(pB3), rB3 fmpyfadd,dbl rA0, rB0, rC00, rC00 fmpyfadd,dbl rA1, rB0, rC10, rC10 fldd 88(pA0), ra0 fmpyfadd,dbl rA2, rB0, rC20, rC20 fmpyfadd,dbl rA3, rB0, rC30, rC30 fldd 88(pB0), rB0 fmpyfadd,dbl rA0, rB1, rC01, rC01 fmpyfadd,dbl rA1, rB1, rC11, rC11 fldd 88(pA1), ra1 fmpyfadd,dbl rA2, rB1, rC21, rC21 fmpyfadd,dbl rA3, rB1, rC31, rC31 fldd 88(pB1), rB1 fmpyfadd,dbl rA0, rB2, rC02, rC02 fmpyfadd,dbl rA1, rB2, rC12, rC12 fldd 88(pA2), ra2 fmpyfadd,dbl rA2, rB2, rC22, rC22 fmpyfadd,dbl rA3, rB2, rC32, rC32 fldd 88(pB2), rB2 fmpyfadd,dbl rA0, rB3, rC03, rC03 fmpyfadd,dbl rA1, rB3, rC13, rC13 fldd 88(pA3), ra3 fmpyfadd,dbl rA2, rB3, rC23, rC23 fmpyfadd,dbl rA3, rB3, rC33, rC33 fldd 88(pB3), rB3 fmpyfadd,dbl ra0, rB0, rC00, rC00 fmpyfadd,dbl ra1, rB0, rC10, rC10 fldd 96(pA0), rA0 fmpyfadd,dbl ra2, rB0, rC20, rC20 fmpyfadd,dbl ra3, rB0, rC30, rC30 fldd 96(pB0), rB0 fmpyfadd,dbl ra0, rB1, rC01, rC01 fmpyfadd,dbl ra1, rB1, rC11, rC11 fldd 96(pA1), rA1 fmpyfadd,dbl ra2, rB1, rC21, rC21 fmpyfadd,dbl ra3, rB1, rC31, rC31 fldd 96(pB1), rB1 fmpyfadd,dbl ra0, rB2, rC02, rC02 fmpyfadd,dbl ra1, rB2, rC12, rC12 fldd 96(pA2), rA2 fmpyfadd,dbl ra2, rB2, rC22, rC22 fmpyfadd,dbl ra3, rB2, rC32, rC32 fldd 96(pB2), rB2 fmpyfadd,dbl ra0, rB3, rC03, rC03 fmpyfadd,dbl ra1, rB3, rC13, rC13 fldd 96(pA3), rA3 fmpyfadd,dbl ra2, rB3, rC23, rC23 fmpyfadd,dbl ra3, rB3, rC33, rC33 fldd 96(pB3), rB3 fmpyfadd,dbl rA0, rB0, rC00, rC00 fmpyfadd,dbl rA1, rB0, rC10, rC10 fldd 104(pA0), ra0 fmpyfadd,dbl rA2, rB0, rC20, rC20 fmpyfadd,dbl rA3, rB0, rC30, rC30 fldd 104(pB0), rB0 fmpyfadd,dbl rA0, rB1, rC01, rC01 fmpyfadd,dbl rA1, rB1, rC11, rC11 fldd 104(pA1), ra1 fmpyfadd,dbl rA2, rB1, rC21, rC21 fmpyfadd,dbl rA3, rB1, rC31, rC31 fldd 104(pB1), rB1 fmpyfadd,dbl rA0, rB2, rC02, rC02 fmpyfadd,dbl rA1, rB2, rC12, rC12 fldd 104(pA2), ra2 fmpyfadd,dbl rA2, rB2, rC22, rC22 fmpyfadd,dbl rA3, rB2, rC32, rC32 fldd 104(pB2), rB2 fmpyfadd,dbl rA0, rB3, rC03, rC03 fmpyfadd,dbl rA1, rB3, rC13, rC13 fldd 104(pA3), ra3 fmpyfadd,dbl rA2, rB3, rC23, rC23 fmpyfadd,dbl rA3, rB3, rC33, rC33 fldd 104(pB3), rB3 fmpyfadd,dbl ra0, rB0, rC00, rC00 fmpyfadd,dbl ra1, rB0, rC10, rC10 fldd 112(pA0), rA0 fmpyfadd,dbl ra2, rB0, rC20, rC20 fmpyfadd,dbl ra3, rB0, rC30, rC30 fldd 112(pB0), rB0 fmpyfadd,dbl ra0, rB1, rC01, rC01 fmpyfadd,dbl ra1, rB1, rC11, rC11 fldd 112(pA1), rA1 fmpyfadd,dbl ra2, rB1, rC21, rC21 fmpyfadd,dbl ra3, rB1, rC31, rC31 fldd 112(pB1), rB1 fmpyfadd,dbl ra0, rB2, rC02, rC02 fmpyfadd,dbl ra1, rB2, rC12, rC12 fldd 112(pA2), rA2 fmpyfadd,dbl ra2, rB2, rC22, rC22 fmpyfadd,dbl ra3, rB2, rC32, rC32 fldd 112(pB2), rB2 fmpyfadd,dbl ra0, rB3, rC03, rC03 fmpyfadd,dbl ra1, rB3, rC13, rC13 fldd 112(pA3), rA3 fmpyfadd,dbl ra2, rB3, rC23, rC23 fmpyfadd,dbl ra3, rB3, rC33, rC33 fldd 112(pB3), rB3 fmpyfadd,dbl rA0, rB0, rC00, rC00 fmpyfadd,dbl rA1, rB0, rC10, rC10 fldd 120(pA0), ra0 fmpyfadd,dbl rA2, rB0, rC20, rC20 fmpyfadd,dbl rA3, rB0, rC30, rC30 fldd 120(pB0), rB0 fmpyfadd,dbl rA0, rB1, rC01, rC01 fmpyfadd,dbl rA1, rB1, rC11, rC11 fldd 120(pA1), ra1 fmpyfadd,dbl rA2, rB1, rC21, rC21 fmpyfadd,dbl rA3, rB1, rC31, rC31 fldd 120(pB1), rB1 fmpyfadd,dbl rA0, rB2, rC02, rC02 fmpyfadd,dbl rA1, rB2, rC12, rC12 fldd 120(pA2), ra2 fmpyfadd,dbl rA2, rB2, rC22, rC22 fmpyfadd,dbl rA3, rB2, rC32, rC32 fldd 120(pB2), rB2 fmpyfadd,dbl rA0, rB3, rC03, rC03 fmpyfadd,dbl rA1, rB3, rC13, rC13 fldd 120(pA3), ra3 fmpyfadd,dbl rA2, rB3, rC23, rC23 fmpyfadd,dbl rA3, rB3, rC33, rC33 fldd 120(pB3), rB3 fmpyfadd,dbl ra0, rB0, rC00, rC00 fmpyfadd,dbl ra1, rB0, rC10, rC10 fldd,mb 128(pA0), rA0 fmpyfadd,dbl ra2, rB0, rC20, rC20 fmpyfadd,dbl ra3, rB0, rC30, rC30 fldd,mb 128(pB0), rB0 fmpyfadd,dbl ra0, rB1, rC01, rC01 fmpyfadd,dbl ra1, rB1, rC11, rC11 fldd,mb 128(pA1), rA1 fmpyfadd,dbl ra2, rB1, rC21, rC21 fmpyfadd,dbl ra3, rB1, rC31, rC31 fldd,mb 128(pB1), rB1 fmpyfadd,dbl ra0, rB2, rC02, rC02 fmpyfadd,dbl ra1, rB2, rC12, rC12 fldd,mb 128(pA2), rA2 fmpyfadd,dbl ra2, rB2, rC22, rC22 fmpyfadd,dbl ra3, rB2, rC32, rC32 fldd,mb 128(pB2), rB2 fmpyfadd,dbl ra0, rB3, rC03, rC03 fmpyfadd,dbl ra1, rB3, rC13, rC13 fldd,mb 128(pA3), rA3 fmpyfadd,dbl ra2, rB3, rC23, rC23 fmpyfadd,dbl ra3, rB3, rC33, rC33;; while (--k);; addib,<> -16, rK, KLOOP fldd,mb 128(pB3), rB3#endif;; Drain pipe; fmpyfadd,dbl rA0, rB0, rC00, rC00 fmpyfadd,dbl rA1, rB0, rC10, rC10 fldd 8(pA0), ra0 fmpyfadd,dbl rA2, rB0, rC20, rC20 fmpyfadd,dbl rA3, rB0, rC30, rC30 fldd 8(pB0), rB0 fmpyfadd,dbl rA0, rB1, rC01, rC01 fmpyfadd,dbl rA1, rB1, rC11, rC11 fldd 8(pA1), ra1 fmpyfadd,dbl rA2, rB1, rC21, rC21 fmpyfadd,dbl rA3, rB1, rC31, rC31 fldd 8(pB1), rB1 fmpyfadd,dbl rA0, rB2, rC02, rC02 fmpyfadd,dbl rA1, rB2, rC12, rC12 fldd 8(pA2), ra2 fmpyfadd,dbl rA2, rB2, rC22, rC22 fmpyfadd,dbl rA3, rB2, rC32, rC32 fldd 8(pB2), rB2 fmpyfadd,dbl rA0, rB3, rC03, rC03 fmpyfadd,dbl rA1, rB3, rC13, rC13 fldd 8(pA3), ra3 fmpyfadd,dbl rA2, rB3, rC23, rC23 fmpyfadd,dbl rA3, rB3, rC33, rC33 fldd 8(pB3), rB3 fmpyfadd,dbl ra0, rB0, rC00, rC00 fmpyfadd,dbl ra1, rB0, rC10, rC10 fldd 16(pA0), rA0 fmpyfadd,dbl ra2, rB0, rC20, rC20 fmpyfadd,dbl ra3, rB0, rC30, rC30 fldd 16(pB0), rB0 fmpyfadd,dbl ra0, rB1, rC01, rC01 fmpyfadd,dbl ra1, rB1, rC11, rC11 fldd 16(pA1), rA1 fmpyfadd,dbl ra2, rB1, rC21, rC21 fmpyfadd,dbl ra3, rB1, rC31, rC31 fldd 16(pB1), rB1 fmpyfadd,dbl ra0, rB2, rC02, rC02 fmpyfadd,dbl ra1, rB2, rC12, rC12 fldd 16(pA2), rA2 fmpyfadd,dbl ra2, rB2, rC22, rC22 fmpyfadd,dbl ra3, rB2, rC32, rC32 fldd 16(pB2), rB2 fmpyfadd,dbl ra0, rB3, rC03, rC03 fmpyfadd,dbl ra1, rB3, rC13, rC13 fldd 16(pA3), rA3 fmpyfadd,dbl ra2, rB3, rC23, rC23 fmpyfadd,dbl ra3, rB3, rC33, rC33 fldd 16(pB3), rB3 fmpyfadd,dbl rA0, rB0, rC00, rC00 fmpyfadd,dbl rA1, rB0, rC10, rC10 fldd 24(pA0), ra0 fmpyfadd,dbl rA2, rB0, rC20, rC20 fmpyfadd,dbl rA3, rB0, rC30, rC30 fldd 24(pB0), rB0 fmpyfadd,dbl rA0, rB1, rC01, rC01 fmpyfadd,dbl rA1, rB1, rC11, rC11 fldd 24(pA1), ra1 fmpyfadd,dbl rA2, rB1, rC21, rC21 fmpyfadd,dbl rA3, rB1, rC31, rC31 fldd 24(pB1), rB1 fmpyfadd,dbl rA0, rB2, rC02, rC02 fmpyfadd,dbl rA1, rB2, rC12, rC12 fldd 24(pA2), ra2 fmpyfadd,dbl rA2, rB2, rC22, rC22 fmpyfadd,dbl rA3, rB2, rC32, rC32 fldd 24(pB2), rB2 fmpyfadd,dbl rA0, rB3, rC03, rC03 fmpyfadd,dbl rA1, rB3, rC13, rC13 fldd 24(pA3), ra3 fmpyfadd,dbl rA2, rB3, rC23, rC23 fmpyfadd,dbl rA3, rB3, rC33, rC33 fldd 24(pB3), rB3 fmpyfadd,dbl ra0, rB0, rC00, rC00 fmpyfadd,dbl ra1, rB0, rC10, rC10 fldd 32(pA0), rA0 fmpyfadd,dbl ra2, rB0, rC20, rC20 fmpyfadd,dbl ra3, rB0, rC30, rC30 fldd 32(pB0), rB0 fmpyfadd,dbl ra0, rB1, rC01, rC01 fmpyfadd,dbl ra1, rB1, rC11, rC11 fldd 32(pA1), rA1 fmpyfadd,dbl ra2, rB1, rC21, rC21 fmpyfadd,dbl ra3, rB1, rC31, rC31 fldd 32(pB1), rB1 fmpyfadd,dbl ra0, rB2, rC02, rC02 fmpyfadd,dbl ra1, rB2, rC12, rC12 fldd 32(pA2), rA2 fmpyfadd,dbl ra2, rB2, rC22, rC22 fmpyfadd,dbl ra3, rB2, rC32, rC32 fldd 32(pB2), rB2 fmpyfadd,dbl ra0, rB3, rC03, rC03 fmpyfadd,dbl ra1, rB3, rC13, rC13 fldd 32(pA3), rA3
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?