atl_dmm4x4xur3_mips.c
来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 2,389 行 · 第 1/5 页
C
2,389 行
madd.d rC22, rC22, ra2, rB2 madd.d rC32, rC32, ra3, rB2 ldc1 rB2, 472(pB2) madd.d rC03, rC03, ra0, rB3 madd.d rC13, rC13, ra1, rB3 ldc1 rA3, 480(pA3) madd.d rC23, rC23, ra2, rB3 madd.d rC33, rC33, ra3, rB3 ldc1 rB3, 472(pB3) madd.d rC00, rC00, rz0, rB0 madd.d rC10, rC10, rz1, rB0 ldc1 ra0, 488(pA0) madd.d rC20, rC20, rz2, rB0 madd.d rC30, rC30, rz3, rB0 ldc1 rB0, 480(pB0) madd.d rC01, rC01, rz0, rB1 madd.d rC11, rC11, rz1, rB1 ldc1 ra1, 488(pA1) madd.d rC21, rC21, rz2, rB1 madd.d rC31, rC31, rz3, rB1 ldc1 rB1, 480(pB1) madd.d rC02, rC02, rz0, rB2 madd.d rC12, rC12, rz1, rB2 ldc1 ra2, 488(pA2) madd.d rC22, rC22, rz2, rB2 madd.d rC32, rC32, rz3, rB2 ldc1 rB2, 480(pB2) madd.d rC03, rC03, rz0, rB3 madd.d rC13, rC13, rz1, rB3 ldc1 ra3, 488(pA3) madd.d rC23, rC23, rz2, rB3 madd.d rC33, rC33, rz3, rB3 ldc1 rB3, 480(pB3)#endif#if KB > 63 madd.d rC00, rC00, rA0, rB0 madd.d rC10, rC10, rA1, rB0 ldc1 rz0, 496(pA0) madd.d rC20, rC20, rA2, rB0 madd.d rC30, rC30, rA3, rB0 ldc1 rB0, 488(pB0) madd.d rC01, rC01, rA0, rB1 madd.d rC11, rC11, rA1, rB1 ldc1 rz1, 496(pA1) madd.d rC21, rC21, rA2, rB1 madd.d rC31, rC31, rA3, rB1 ldc1 rB1, 488(pB1) madd.d rC02, rC02, rA0, rB2 madd.d rC12, rC12, rA1, rB2 ldc1 rz2, 496(pA2) madd.d rC22, rC22, rA2, rB2 madd.d rC32, rC32, rA3, rB2 ldc1 rB2, 488(pB2) madd.d rC03, rC03, rA0, rB3 madd.d rC13, rC13, rA1, rB3 ldc1 rz3, 496(pA3) madd.d rC23, rC23, rA2, rB3 madd.d rC33, rC33, rA3, rB3 ldc1 rB3, 488(pB3) madd.d rC00, rC00, ra0, rB0 madd.d rC10, rC10, ra1, rB0 ldc1 rA0, 504(pA0) madd.d rC20, rC20, ra2, rB0 madd.d rC30, rC30, ra3, rB0 ldc1 rB0, 496(pB0) madd.d rC01, rC01, ra0, rB1 madd.d rC11, rC11, ra1, rB1 ldc1 rA1, 504(pA1) madd.d rC21, rC21, ra2, rB1 madd.d rC31, rC31, ra3, rB1 ldc1 rB1, 496(pB1) madd.d rC02, rC02, ra0, rB2 madd.d rC12, rC12, ra1, rB2 ldc1 rA2, 504(pA2) madd.d rC22, rC22, ra2, rB2 madd.d rC32, rC32, ra3, rB2 ldc1 rB2, 496(pB2) madd.d rC03, rC03, ra0, rB3 madd.d rC13, rC13, ra1, rB3 ldc1 rA3, 504(pA3) madd.d rC23, rC23, ra2, rB3 madd.d rC33, rC33, ra3, rB3 ldc1 rB3, 496(pB3) madd.d rC00, rC00, rz0, rB0 madd.d rC10, rC10, rz1, rB0 ldc1 ra0, 512(pA0) madd.d rC20, rC20, rz2, rB0 madd.d rC30, rC30, rz3, rB0 ldc1 rB0, 504(pB0) madd.d rC01, rC01, rz0, rB1 madd.d rC11, rC11, rz1, rB1 ldc1 ra1, 512(pA1) madd.d rC21, rC21, rz2, rB1 madd.d rC31, rC31, rz3, rB1 ldc1 rB1, 504(pB1) madd.d rC02, rC02, rz0, rB2 madd.d rC12, rC12, rz1, rB2 ldc1 ra2, 512(pA2) madd.d rC22, rC22, rz2, rB2 madd.d rC32, rC32, rz3, rB2 ldc1 rB2, 504(pB2) madd.d rC03, rC03, rz0, rB3 madd.d rC13, rC13, rz1, rB3 ldc1 ra3, 512(pA3) madd.d rC23, rC23, rz2, rB3 madd.d rC33, rC33, rz3, rB3 ldc1 rB3, 504(pB3)#endif#if KB > 66 madd.d rC00, rC00, rA0, rB0 madd.d rC10, rC10, rA1, rB0 ldc1 rz0, 520(pA0) madd.d rC20, rC20, rA2, rB0 madd.d rC30, rC30, rA3, rB0 ldc1 rB0, 512(pB0) madd.d rC01, rC01, rA0, rB1 madd.d rC11, rC11, rA1, rB1 ldc1 rz1, 520(pA1) madd.d rC21, rC21, rA2, rB1 madd.d rC31, rC31, rA3, rB1 ldc1 rB1, 512(pB1) madd.d rC02, rC02, rA0, rB2 madd.d rC12, rC12, rA1, rB2 ldc1 rz2, 520(pA2) madd.d rC22, rC22, rA2, rB2 madd.d rC32, rC32, rA3, rB2 ldc1 rB2, 512(pB2) madd.d rC03, rC03, rA0, rB3 madd.d rC13, rC13, rA1, rB3 ldc1 rz3, 520(pA3) madd.d rC23, rC23, rA2, rB3 madd.d rC33, rC33, rA3, rB3 ldc1 rB3, 512(pB3) madd.d rC00, rC00, ra0, rB0 madd.d rC10, rC10, ra1, rB0 ldc1 rA0, 528(pA0) madd.d rC20, rC20, ra2, rB0 madd.d rC30, rC30, ra3, rB0 ldc1 rB0, 520(pB0) madd.d rC01, rC01, ra0, rB1 madd.d rC11, rC11, ra1, rB1 ldc1 rA1, 528(pA1) madd.d rC21, rC21, ra2, rB1 madd.d rC31, rC31, ra3, rB1 ldc1 rB1, 520(pB1) madd.d rC02, rC02, ra0, rB2 madd.d rC12, rC12, ra1, rB2 ldc1 rA2, 528(pA2) madd.d rC22, rC22, ra2, rB2 madd.d rC32, rC32, ra3, rB2 ldc1 rB2, 520(pB2) madd.d rC03, rC03, ra0, rB3 madd.d rC13, rC13, ra1, rB3 ldc1 rA3, 528(pA3) madd.d rC23, rC23, ra2, rB3 madd.d rC33, rC33, ra3, rB3 ldc1 rB3, 520(pB3) madd.d rC00, rC00, rz0, rB0 madd.d rC10, rC10, rz1, rB0 ldc1 ra0, 536(pA0) madd.d rC20, rC20, rz2, rB0 madd.d rC30, rC30, rz3, rB0 ldc1 rB0, 528(pB0) madd.d rC01, rC01, rz0, rB1 madd.d rC11, rC11, rz1, rB1 ldc1 ra1, 536(pA1) madd.d rC21, rC21, rz2, rB1 madd.d rC31, rC31, rz3, rB1 ldc1 rB1, 528(pB1) madd.d rC02, rC02, rz0, rB2 madd.d rC12, rC12, rz1, rB2 ldc1 ra2, 536(pA2) madd.d rC22, rC22, rz2, rB2 madd.d rC32, rC32, rz3, rB2 ldc1 rB2, 528(pB2) madd.d rC03, rC03, rz0, rB3 madd.d rC13, rC13, rz1, rB3 ldc1 ra3, 536(pA3) madd.d rC23, rC23, rz2, rB3 madd.d rC33, rC33, rz3, rB3 ldc1 rB3, 528(pB3)#endif#if KB > 69 madd.d rC00, rC00, rA0, rB0 madd.d rC10, rC10, rA1, rB0 ldc1 rz0, 544(pA0) madd.d rC20, rC20, rA2, rB0 madd.d rC30, rC30, rA3, rB0 ldc1 rB0, 536(pB0) madd.d rC01, rC01, rA0, rB1 madd.d rC11, rC11, rA1, rB1 ldc1 rz1, 544(pA1) madd.d rC21, rC21, rA2, rB1 madd.d rC31, rC31, rA3, rB1 ldc1 rB1, 536(pB1) madd.d rC02, rC02, rA0, rB2 madd.d rC12, rC12, rA1, rB2 ldc1 rz2, 544(pA2) madd.d rC22, rC22, rA2, rB2 madd.d rC32, rC32, rA3, rB2 ldc1 rB2, 536(pB2) madd.d rC03, rC03, rA0, rB3 madd.d rC13, rC13, rA1, rB3 ldc1 rz3, 544(pA3) madd.d rC23, rC23, rA2, rB3 madd.d rC33, rC33, rA3, rB3 ldc1 rB3, 536(pB3) madd.d rC00, rC00, ra0, rB0 madd.d rC10, rC10, ra1, rB0 ldc1 rA0, 552(pA0) madd.d rC20, rC20, ra2, rB0 madd.d rC30, rC30, ra3, rB0 ldc1 rB0, 544(pB0) madd.d rC01, rC01, ra0, rB1 madd.d rC11, rC11, ra1, rB1 ldc1 rA1, 552(pA1) madd.d rC21, rC21, ra2, rB1 madd.d rC31, rC31, ra3, rB1 ldc1 rB1, 544(pB1) madd.d rC02, rC02, ra0, rB2 madd.d rC12, rC12, ra1, rB2 ldc1 rA2, 552(pA2) madd.d rC22, rC22, ra2, rB2 madd.d rC32, rC32, ra3, rB2 ldc1 rB2, 544(pB2) madd.d rC03, rC03, ra0, rB3 madd.d rC13, rC13, ra1, rB3 ldc1 rA3, 552(pA3) madd.d rC23, rC23, ra2, rB3 madd.d rC33, rC33, ra3, rB3 ldc1 rB3, 544(pB3) madd.d rC00, rC00, rz0, rB0 madd.d rC10, rC10, rz1, rB0 ldc1 ra0, 560(pA0) madd.d rC20, rC20, rz2, rB0 madd.d rC30, rC30, rz3, rB0 ldc1 rB0, 552(pB0) madd.d rC01, rC01, rz0, rB1 madd.d rC11, rC11, rz1, rB1 ldc1 ra1, 560(pA1) madd.d rC21, rC21, rz2, rB1 madd.d rC31, rC31, rz3, rB1 ldc1 rB1, 552(pB1) madd.d rC02, rC02, rz0, rB2 madd.d rC12, rC12, rz1, rB2 ldc1 ra2, 560(pA2) madd.d rC22, rC22, rz2, rB2 madd.d rC32, rC32, rz3, rB2 ldc1 rB2, 552(pB2) madd.d rC03, rC03, rz0, rB3 madd.d rC13, rC13, rz1, rB3 ldc1 ra3, 560(pA3) madd.d rC23, rC23, rz2, rB3 madd.d rC33, rC33, rz3, rB3 ldc1 rB3, 552(pB3)#endif#if KB > 72 madd.d rC00, rC00, rA0, rB0 madd.d rC10, rC10, rA1, rB0 ldc1 rz0, 568(pA0) madd.d rC20, rC20, rA2, rB0 madd.d rC30, rC30, rA3, rB0 ldc1 rB0, 560(pB0) madd.d rC01, rC01, rA0, rB1 madd.d rC11, rC11, rA1, rB1 ldc1 rz1, 568(pA1) madd.d rC21, rC21, rA2, rB1 madd.d rC31, rC31, rA3, rB1 ldc1 rB1, 560(pB1) madd.d rC02, rC02, rA0, rB2 madd.d rC12, rC12, rA1, rB2 ldc1 rz2, 568(pA2) madd.d rC22, rC22, rA2, rB2 madd.d rC32, rC32, rA3, rB2 ldc1 rB2, 560(pB2) madd.d rC03, rC03, rA0, rB3 madd.d rC13, rC13, rA1, rB3 ldc1 rz3, 568(pA3) madd.d rC23, rC23, rA2, rB3 madd.d rC33, rC33, rA3, rB3 ldc1 rB3, 560(pB3) madd.d rC00, rC00, ra0, rB0 madd.d rC10, rC10, ra1, rB0 ldc1 rA0, 576(pA0) madd.d rC20, rC20, ra2, rB0 madd.d rC30, rC30, ra3, rB0 ldc1 rB0, 568(pB0) madd.d rC01, rC01, ra0, rB1 madd.d rC11, rC11, ra1, rB1 ldc1 rA1, 576(pA1) madd.d rC21, rC21, ra2, rB1 madd.d rC31, rC31, ra3, rB1 ldc1 rB1, 568(pB1) madd.d rC02, rC02, ra0, rB2 madd.d rC12, rC12, ra1, rB2 ldc1 rA2, 576(pA2) madd.d rC22, rC22, ra2, rB2 madd.d rC32, rC32, ra3, rB2 ldc1 rB2, 568(pB2) madd.d rC03, rC03, ra0, rB3 madd.d rC13, rC13, ra1, rB3 ldc1 rA3, 576(pA3) madd.d rC23, rC23, ra2, rB3 madd.d rC33, rC33, ra3, rB3 ldc1 rB3, 568(pB3) madd.d rC00, rC00, rz0, rB0 madd.d rC10, rC10, rz1, rB0 ldc1 ra0, 584(pA0) madd.d rC20, rC20, rz2, rB0 madd.d rC30, rC30, rz3, rB0 ldc1 rB0, 576(pB0) madd.d rC01, rC01, rz0, rB1 madd.d rC11, rC11, rz1, rB1 ldc1 ra1, 584(pA1) madd.d rC21, rC21, rz2, rB1 madd.d rC31, rC31, rz3, rB1 ldc1 rB1, 576(pB1) madd.d rC02, rC02, rz0, rB2 madd.d rC12, rC12, rz1, rB2 ldc1 ra2, 584(pA2) madd.d rC22, rC22, rz2, rB2 madd.d rC32, rC32, rz3, rB2 ldc1 rB2, 576(pB2) madd.d rC03, rC03, rz0, rB3 madd.d rC13, rC13, rz1, rB3 ldc1 ra3, 584(pA3) madd.d rC23, rC23, rz2, rB3 madd.d rC33, rC33, rz3, rB3 ldc1 rB3, 576(pB3)#endif/* daddiu K, K, 8 bne K, K0, KLOOP madd.d rC33, rC33, ra3, rb3 /* /* in delay slot! *//* * Drain ld/use pipe */ madd.d rC00, rC00, rA0, rB0 daddiu pC0, pC0, CMUL(32) madd.d rC10, rC10, rA1, rB0 ldc1 rz0, KB*8-8(pA0) madd.d rC20, rC20, rA2, rB0 daddu pA0, pA0, incAm madd.d rC30, rC30, rA3, rB0 ldc1 rB0, KB*8-16(pB0) madd.d rC01, rC01, rA0, rB1 daddiu pC1, pC1, CMUL(32) madd.d rC11, rC11, rA1, rB1 ldc1 rz1, KB*8-8(pA1) madd.d rC21, rC21, rA2, rB1 daddu pA1, pA1, incAm madd.d rC31, rC31, rA3, rB1 ldc1 rB1, KB*8-16(pB1) madd.d rC02, rC02, rA0, rB2 daddiu pC2, pC2, CMUL(32) madd.d rC12, rC12, rA1, rB2 ldc1 rz2, KB*8-8(pA2) madd.d rC22, rC22, rA2, rB2 daddu pA2, pA2, incAm madd.d rC32, rC32, rA3, rB2 ldc1 rB2, KB*8-16(pB2) madd.d rC03, rC03, rA0, rB3 daddiu pC3, pC3, CMUL(32) madd.d rC13, rC13, rA1, rB3 ldc1 rz3, KB*8-8(pA3) madd.d rC23, rC23, rA2, rB3 daddu pA3, pA3, incAm madd.d rC33, rC33, rA3, rB3 ldc1 rB3, KB*8-16(pB3) madd.d rC00, rC00, ra0, rB0 madd.d rC10, rC10, ra1, rB0 madd.d rC20, rC20, ra2, rB0 madd.d rC30, rC30, ra3, rB0 ldc1 rB0, KB*8-8(pB0) madd.d rC01, rC01, ra0, rB1 madd.d rC11, rC11, ra1, rB1 madd.d rC21, rC21, ra2, rB1 madd.d rC31, rC31, ra3, rB1 ldc1 rB1, KB*8-8(pB1) madd.d rC02, rC02, ra0, rB2 madd.d rC12, rC12, ra1, rB2 madd.d rC22, rC22, ra2, rB2 madd.d rC32, rC32, ra3, rB2 ldc1 rB2, KB*8-8(pB2) madd.d rC03, rC03, ra0, rB3 madd.d rC13, rC13, ra1, rB3 madd.d rC23, rC23, ra2, rB3 madd.d rC33, rC33, ra3, rB3 ldc1 rB3, KB*8-8(pB3) madd.d rC00, rC00, rz0, rB0 madd.d rC10, rC10, rz1, rB0 madd.d rC20, rC20, rz2, rB0 madd.d rC30, rC30, rz3, rB0 madd.d rC01, rC01, rz0, rB1 sdc1 rC00, -CMUL(32)(pC0) madd.d rC11, rC11, rz1, rB1 sdc1 rC10, -CMUL(24)(pC0) madd.d rC21, rC21, rz2, rB1 sdc1 rC20, -CMUL(16)(pC0) madd.d rC31, rC31, rz3, rB1 sdc1 rC30, -CMUL(8)(pC0) madd.d rC02, rC02, rz0, rB2 sdc1 rC01, -CMUL(32)(pC1) madd.d rC12, rC12, rz1, rB2 sdc1 rC11, -CMUL(24)(pC1) madd.d rC22, rC22, rz2, rB2 sdc1 rC21, -CMUL(16)(pC1) madd.d rC32, rC32, rz3, rB2 sdc1 rC31, -CMUL(8)(pC1) madd.d rC03, rC03, rz0, rB3 sdc1 rC02, -CMUL(32)(pC2) madd.d rC13, rC13, rz1, rB3 sdc1 rC12, -CMUL(24)(pC2) madd.d rC23, rC23, rz2, rB3 sdc1 rC22, -CMUL(16)(pC2) madd.d rC33, rC33, rz3, rB3 sdc1 rC32, -CMUL(8)(pC2) sdc1 rC03, -CMUL(32)(pC3) sdc1 rC13, -CMUL(24)(pC3) sdc1 rC23, -CMUL(16)(pC3) bne pA0, stAm, MLOOP sdc1 rC33, -CMUL(8)(pC3) dsubu pA0, pA0, incAn dsubu pA1, pA1, incAn dsubu pA2, pA2, incAn dsubu pA3, pA3, incAn daddu pC0, pC0, incCn daddu pC1, pC1, incCn daddu pC2, pC2, incCn daddu pC3, pC3, incCn daddu pB0, pB0, incBn daddu pB1, pB1, incBn daddu pB2, pB2, incBn bne pB0, stBn, NLOOP daddu pB3, pB3, incBn /* delay slot! */DONE:/* * Epilogue: restore registers and return */ ld $16, 0($sp) ld $17, 8($sp) ld $18, 16($sp) ld $19, 24($sp) ld $20, 32($sp) ld $21, 40($sp) ld $22, 64($sp) ld $23, 72($sp) ld $30, 80($sp)#ifdef ATL_USE64BITS ldc1 $f24, 88($sp) ldc1 $f25, 96($sp) ldc1 $f26, 104($sp) ldc1 $f27, 112($sp) ldc1 $f28, 120($sp) ldc1 $f29, 128($sp) ldc1 $f30, 136($sp) ldc1 $f31, 144($sp)#else ldc1 $f20, 88($sp) ldc1 $f22, 96($sp) ldc1 $f24, 104($sp) ldc1 $f26, 112($sp) ldc1 $f28, 120($sp) ldc1 $f30, 128($sp)#endif j $31 daddiu $sp, $sp, FSIZE /* delay slot of return statement *//* end of file MIPS assembler BS */ .set macro .set reorder .set at#ifndef ATL_OS_IRIX .size ATL_USERMM,.-ATL_USERMM#endif .end ATL_USERMM
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?