atl_mm8x8x2.c

来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 529 行 · 第 1/2 页

C
529
字号
            rC2_2 += ra2 * rb2;            rC3_2 += ra3 * rb2;            rC4_2 += ra4 * rb2;            rC5_2 += ra5 * rb2;            rC6_2 += ra6 * rb2;            rC7_2 += ra7 * rb2;            rC0_3 += ra0 * rb3;            rC1_3 += ra1 * rb3;            rC2_3 += ra2 * rb3;            rC3_3 += ra3 * rb3;            rC4_3 += ra4 * rb3;            rC5_3 += ra5 * rb3;            rC6_3 += ra6 * rb3;            rC7_3 += ra7 * rb3;            rC0_4 += ra0 * rb4;            rC1_4 += ra1 * rb4;            rC2_4 += ra2 * rb4;            rC3_4 += ra3 * rb4;            rC4_4 += ra4 * rb4;            rC5_4 += ra5 * rb4;            rC6_4 += ra6 * rb4;            rC7_4 += ra7 * rb4;            rC0_5 += ra0 * rb5;            rC1_5 += ra1 * rb5;            rC2_5 += ra2 * rb5;            rC3_5 += ra3 * rb5;            rC4_5 += ra4 * rb5;            rC5_5 += ra5 * rb5;            rC6_5 += ra6 * rb5;            rC7_5 += ra7 * rb5;            rC0_6 += ra0 * rb6;            rC1_6 += ra1 * rb6;            rC2_6 += ra2 * rb6;            rC3_6 += ra3 * rb6;            rC4_6 += ra4 * rb6;            rC5_6 += ra5 * rb6;            rC6_6 += ra6 * rb6;            rC7_6 += ra7 * rb6;            rC0_7 += ra0 * rb7;            rC1_7 += ra1 * rb7;            rC2_7 += ra2 * rb7;            rC3_7 += ra3 * rb7;            rC4_7 += ra4 * rb7;            rC5_7 += ra5 * rb7;            rC6_7 += ra6 * rb7;            rC7_7 += ra7 * rb7;         }                                ATL_pfl1R(pfA);         rC0_0 += rA0 * rB0;                                ATL_pfl1R(pfA+16);         rC1_0 += rA1 * rB0;                                ATL_pfl1R(pfA+32);         rC2_0 += rA2 * rB0;                                ATL_pfl1R(pfA+48);         rC3_0 += rA3 * rB0;                                pfA += 64;         rC4_0 += rA4 * rB0;         rC5_0 += rA5 * rB0;         rC6_0 += rA6 * rB0;         rC7_0 += rA7 * rB0;         rC0_1 += rA0 * rB1;                                   ra0 = *pA0++;         rC1_1 += rA1 * rB1;                                   ra1 = *pA1++;         rC2_1 += rA2 * rB1;                                   ra2 = *pA2++;         rC3_1 += rA3 * rB1;                                   ra3 = *pA3++;         rC4_1 += rA4 * rB1;                                   ra4 = *pA4++;         rC5_1 += rA5 * rB1;                                   ra5 = *pA5++;         rC6_1 += rA6 * rB1;                                   ra6 = *pA6++;         rC7_1 += rA7 * rB1;                                   ra7 = *pA7++;         rC0_2 += rA0 * rB2;                                   rb0 = *pB0++;         rC1_2 += rA1 * rB2;                                   rb1 = *pB1++;         rC2_2 += rA2 * rB2;                                   rb2 = *pB2++;         rC3_2 += rA3 * rB2;                                   rb3 = *pB3++;         rC4_2 += rA4 * rB2;                                   rb4 = *pB4++;         rC5_2 += rA5 * rB2;                                   rb5 = *pB5++;         rC6_2 += rA6 * rB2;                                   rb6 = *pB6++;         rC7_2 += rA7 * rB2;                                   rb7 = *pB7++;         rC0_3 += rA0 * rB3;         rC1_3 += rA1 * rB3;                         pA0 += incAm;         rC2_3 += rA2 * rB3;                         pA1 += incAm;         rC3_3 += rA3 * rB3;                         pA2 += incAm;         rC4_3 += rA4 * rB3;                         pA3 += incAm;         rC5_3 += rA5 * rB3;                         pA4 += incAm;         rC6_3 += rA6 * rB3;                         pA5 += incAm;         rC7_3 += rA7 * rB3;                         pA6 += incAm;         rC0_4 += rA0 * rB4;                         pA7 += incAm;         rC1_4 += rA1 * rB4;                         pB0 += incBm;         rC2_4 += rA2 * rB4;                         pB1 += incBm;         rC3_4 += rA3 * rB4;                         pB2 += incBm;         rC4_4 += rA4 * rB4;                         pB3 += incBm;         rC5_4 += rA5 * rB4;                         pB4 += incBm;         rC6_4 += rA6 * rB4;                         pB5 += incBm;         rC7_4 += rA7 * rB4;                         pB6 += incBm;         rC0_5 += rA0 * rB5;                         pB7 += incBm;         rC1_5 += rA1 * rB5;         rC2_5 += rA2 * rB5;         rC3_5 += rA3 * rB5;         rC4_5 += rA4 * rB5;         rC5_5 += rA5 * rB5;         rC6_5 += rA6 * rB5;         rC7_5 += rA7 * rB5;         rC0_6 += rA0 * rB6;         rC1_6 += rA1 * rB6;         rC2_6 += rA2 * rB6;         rC3_6 += rA3 * rB6;         rC4_6 += rA4 * rB6;         rC5_6 += rA5 * rB6;         rC6_6 += rA6 * rB6;         rC7_6 += rA7 * rB6;         rC0_7 += rA0 * rB7;         rC1_7 += rA1 * rB7;         rC2_7 += rA2 * rB7;         rC3_7 += rA3 * rB7;         rC4_7 += rA4 * rB7;         rC5_7 += rA5 * rB7;         rC6_7 += rA6 * rB7;         rC7_7 += rA7 * rB7;         rC0_0 += ra0 * rb0;         rC1_0 += ra1 * rb0;         rC2_0 += ra2 * rb0;         rC3_0 += ra3 * rb0;         rC4_0 += ra4 * rb0;         rC5_0 += ra5 * rb0;         rC6_0 += ra6 * rb0;         rC7_0 += ra7 * rb0;         rC0_1 += ra0 * rb1;         rC1_1 += ra1 * rb1;         rC2_1 += ra2 * rb1;         rC3_1 += ra3 * rb1;         rC4_1 += ra4 * rb1;         rC5_1 += ra5 * rb1;         rC6_1 += ra6 * rb1;         rC7_1 += ra7 * rb1;         rC0_2 += ra0 * rb2;         rC1_2 += ra1 * rb2;         rC2_2 += ra2 * rb2;         rC3_2 += ra3 * rb2;         rC4_2 += ra4 * rb2;         rC5_2 += ra5 * rb2;         rC6_2 += ra6 * rb2;         rC7_2 += ra7 * rb2;         rC0_3 += ra0 * rb3;         rC1_3 += ra1 * rb3;         rC2_3 += ra2 * rb3;         rC3_3 += ra3 * rb3;         rC4_3 += ra4 * rb3;         rC5_3 += ra5 * rb3;         rC6_3 += ra6 * rb3;         rC7_3 += ra7 * rb3;         rC0_4 += ra0 * rb4;         rC1_4 += ra1 * rb4;         rC2_4 += ra2 * rb4;         rC3_4 += ra3 * rb4;         rC4_4 += ra4 * rb4;         rC5_4 += ra5 * rb4;         rC6_4 += ra6 * rb4;         rC7_4 += ra7 * rb4;         rC0_5 += ra0 * rb5;         rC1_5 += ra1 * rb5;         rC2_5 += ra2 * rb5;         rC3_5 += ra3 * rb5;         rC4_5 += ra4 * rb5;         rC5_5 += ra5 * rb5;         rC6_5 += ra6 * rb5;         rC7_5 += ra7 * rb5;         rC0_6 += ra0 * rb6;         rC1_6 += ra1 * rb6;         rC2_6 += ra2 * rb6;         rC3_6 += ra3 * rb6;         rC4_6 += ra4 * rb6;         rC5_6 += ra5 * rb6;         rC6_6 += ra6 * rb6;         rC7_6 += ra7 * rb6;         rC0_7 += ra0 * rb7;         rC1_7 += ra1 * rb7;         rC2_7 += ra2 * rb7;         rC3_7 += ra3 * rb7;         rC4_7 += ra4 * rb7;         rC5_7 += ra5 * rb7;         rC6_7 += ra6 * rb7;         rC7_7 += ra7 * rb7;         *pC0   = rC0_0; pC0[1 SHIFT] = rC1_0;         pC0[2 SHIFT] = rC2_0; pC0[3 SHIFT] = rC3_0;         pC0[4 SHIFT] = rC4_0; pC0[5 SHIFT] = rC5_0;         pC0[6 SHIFT] = rC6_0; pC0[7 SHIFT] = rC7_0;         *pC1   = rC0_1; pC1[1 SHIFT] = rC1_1;         pC1[2 SHIFT] = rC2_1; pC1[3 SHIFT] = rC3_1;         pC1[4 SHIFT] = rC4_1; pC1[5 SHIFT] = rC5_1;         pC1[6 SHIFT] = rC6_1; pC1[7 SHIFT] = rC7_1;         *pC2   = rC0_2; pC2[1 SHIFT] = rC1_2;         pC2[2 SHIFT] = rC2_2; pC2[3 SHIFT] = rC3_2;         pC2[4 SHIFT] = rC4_2; pC2[5 SHIFT] = rC5_2;         pC2[6 SHIFT] = rC6_2; pC2[7 SHIFT] = rC7_2;         *pC3   = rC0_3; pC3[1 SHIFT] = rC1_3;         pC3[2 SHIFT] = rC2_3; pC3[3 SHIFT] = rC3_3;         pC3[4 SHIFT] = rC4_3; pC3[5 SHIFT] = rC5_3;         pC3[6 SHIFT] = rC6_3; pC3[7 SHIFT] = rC7_3;         *pC4   = rC0_4; pC4[1 SHIFT] = rC1_4;         pC4[2 SHIFT] = rC2_4; pC4[3 SHIFT] = rC3_4;         pC4[4 SHIFT] = rC4_4; pC4[5 SHIFT] = rC5_4;         pC4[6 SHIFT] = rC6_4; pC4[7 SHIFT] = rC7_4;         *pC5   = rC0_5; pC5[1 SHIFT] = rC1_5;         pC5[2 SHIFT] = rC2_5; pC5[3 SHIFT] = rC3_5;         pC5[4 SHIFT] = rC4_5; pC5[5 SHIFT] = rC5_5;         pC5[6 SHIFT] = rC6_5; pC5[7 SHIFT] = rC7_5;         *pC6   = rC0_6; pC6[1 SHIFT] = rC1_6;         pC6[2 SHIFT] = rC2_6; pC6[3 SHIFT] = rC3_6;         pC6[4 SHIFT] = rC4_6; pC6[5 SHIFT] = rC5_6;         pC6[6 SHIFT] = rC6_6; pC6[7 SHIFT] = rC7_6;         *pC7   = rC0_7; pC7[1 SHIFT] = rC1_7;         pC7[2 SHIFT] = rC2_7; pC7[3 SHIFT] = rC3_7;         pC7[4 SHIFT] = rC4_7; pC7[5 SHIFT] = rC5_7;         pC7[6 SHIFT] = rC6_7; pC7[7 SHIFT] = rC7_7;         pC0 += incCm; pC1 += incCm; pC2 += incCm; pC3 += incCm;         pC4 += incCm; pC5 += incCm; pC6 += incCm; pC7 += incCm;      }      while(pA0 != stM);      pC0 += incCn; pC1 += incCn; pC2 += incCn; pC3 += incCn;      pC4 += incCn; pC5 += incCn; pC6 += incCn; pC7 += incCn;      pA0 += incAn; pA1 += incAn; pA2 += incAn; pA3 += incAn;      pA4 += incAn; pA5 += incAn; pA6 += incAn; pA7 += incAn;      pB0 += incBn; pB1 += incBn; pB2 += incBn; pB3 += incBn;      pB4 += incBn; pB5 += incBn; pB6 += incBn; pB7 += incBn;   }   while(pB0 != stN);}#ifndef TREAL   #undef ldc2#endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?