📄 atl_dmm4x1x90_x87.c
字号:
prefetchw (pC0) fldl 0-128(pB0) /* st = rB0 */ fldl 0-128(pA0) /* st = rA0, rB0 */ fmul %st(1), %st /* st = rA0*rB0, rB0 */ fldl 0-128(pA0,lda) /* st = rA1, rC0, rB0 */ fmul %st(2), %st /* st = rA1*rB0, rC0, rB0 */ fldl 0-128(pA0,lda,2) /* st = rA2, rC1, rC0, rB0 */ fmul %st(3), %st /* st = rA2*rB0, rC1, rC0, rB0 */ fxch %st(3) /* st = rB0, rC1, rC0, rC2 */ fmull 0-128(pA0,lda3) /* st = rA3*rB0, rC1, rC0, rC2 *//*KLOOP: */ /* st = rC3, rC1, rC0, rC2 */#if KB > 1 fldl 8-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 8-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 8-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 8-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 8-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 2 fldl 16-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 16-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 16-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 16-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 16-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 3 fldl 24-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 24-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 24-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 24-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 24-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 4 fldl 32-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 32-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 32-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 32-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 32-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 5 fldl 40-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 40-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 40-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 40-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 40-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 6 fldl 48-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 48-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 48-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 48-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 48-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 7 fldl 56-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 56-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 56-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 56-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 56-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 8 fldl 64-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 64-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 64-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 64-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 64-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 9 fldl 72-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 72-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 72-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 72-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 72-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 10 fldl 80-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 80-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 80-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 80-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 80-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 11 fldl 88-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 88-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 88-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 88-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 88-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 12 fldl 96-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 96-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 96-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 96-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 96-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 13 fldl 104-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 104-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 104-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 104-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 104-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 14 fldl 112-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 112-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 112-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 112-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 112-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 15 fldl 120-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 120-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 120-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 120-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 120-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 16 fldl 128-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 128-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 128-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 128-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 128-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 17 fldl 136-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 136-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 136-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 136-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 136-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 18 fldl 144-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 144-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */ fmul %st(1), %st /* st = rA0*rB0, rB0, rC3, rC1, rC0, rC2 */ faddp %st, %st(4) /* st = rB0, rC3, rC1, rC0+, rC2 */ fldl 144-128(pA0,lda) /* st = rA1, rB0, rC3, rC1, rC0+, rC2 */ fmul %st(1),%st /* st = rA1*rB0, rB0, rC3, rC1, rC0+, rC2 */ faddp %st, %st(3) /* st = rA1*rB0, rB0, rC3, rC1+, rC0+, rC2 */ fldl 144-128(pA0,lda,2) /* st = rA2, rB0, rC3, rC1+, rC0+,rC2 */ fmul %st(1), %st /* st = rA2*rB0, rB0, rC3, rC1+, rC0+, rC2 */ faddp %st, %st(5) /* st = rB0, rC3, rC1+, rC0+, rC2+ */ fmull 144-128(pA0,lda3) /* st = rA3*rB0, rC3, rC1+, rC0+, rC2+ */ faddp %st, %st(1) /* st = rC3+, rC1+, rC0+, rC2+ */#endif#if KB > 19 fldl 152-128(pB0) /* st = rB0, rC3, rC1, rC0, rC2 */ fldl 152-128(pA0) /* st = rA0, rB0, rC3, rC1, rC0, rC2 */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -