📄 gemm_kernel_hummer.s
字号:
LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A4, AO, INC2 LFPDUX B4, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX B5, BO, INC2 LFPDUX A6, AO, INC2 LFPDUX B6, BO, INC2 LFPDUX A7, AO, INC2 LFPDUX A9, BO, INC2 LFPDUX A8, AO, INC2 LFPDUX A10, BO, INC2 bdz- .L73 .align 4.L72: fxcpmadd f0, B1, A1, f0 fxcsmadd f1, B1, A1, f1 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 fxcpmadd f2, B2, A2, f2 fxcsmadd f3, B2, A2, f3 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 fxcpmadd f0, B3, A3, f0 fxcsmadd f1, B3, A3, f1 LFPDUX A3, AO, INC2 LFPDUX B3, BO, INC2 fxcpmadd f2, B4, A4, f2 fxcsmadd f3, B4, A4, f3 LFPDUX A4, AO, INC2 LFPDUX B4, BO, INC2 fxcpmadd f0, B5, A5, f0 fxcsmadd f1, B5, A5, f1 LFPDUX A5, AO, INC2 LFPDUX B5, BO, INC2 fxcpmadd f2, B6, A6, f2 fxcsmadd f3, B6, A6, f3 LFPDUX A6, AO, INC2 LFPDUX B6, BO, INC2 fxcpmadd f0, A9, A7, f0 fxcsmadd f1, A9, A7, f1 LFPDUX A7, AO, INC2 LFPDUX A9, BO, INC2 fxcpmadd f2, A10, A8, f2 fxcsmadd f3, A10, A8, f3 LFPDUX A8, AO, INC2 LFPDUX A10, BO, INC2 bdnz+ .L72 .align 4.L73: fxcpmadd f0, B1, A1, f0 fxcsmadd f1, B1, A1, f1 fxcpmadd f2, B2, A2, f2 fxcsmadd f3, B2, A2, f3 fxcpmadd f0, B3, A3, f0 fxcsmadd f1, B3, A3, f1 fxcpmadd f2, B4, A4, f2 fxcsmadd f3, B4, A4, f3 fxcpmadd f0, B5, A5, f0 fxcsmadd f1, B5, A5, f1 fxcpmadd f2, B6, A6, f2 fxcsmadd f3, B6, A6, f3 fxcpmadd f0, A9, A7, f0 fxcsmadd f1, A9, A7, f1 fxcpmadd f2, A10, A8, f2 fxcsmadd f3, A10, A8, f3 .align 4.L74: lfd AP, ALPHA(SP)#ifdef TRMMKERNEL fsmfp AP, AP#endif#if defined(TRMMKERNEL)#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK#elif defined(LEFT) addi TEMP, KK, 2#else addi TEMP, KK, 2#endif andi. TEMP, TEMP, 7 mtspr CTR, TEMP#else andi. r0, K, 7 mtspr CTR, r0#endif ble+ .L78 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 bdz- .L77 .align 4.L76: fxcpmadd f0, B1, A1, f0 fxcsmadd f1, B1, A1, f1 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 bdnz+ .L76 .align 4.L77: fxcpmadd f0, B1, A1, f0 fxcsmadd f1, B1, A1, f1 .align 4.L78:#ifndef TRMMKERNEL LFPDX A1, CO1, INC2 LFPDX B3, CO2, INC2 fpadd f0, f0, f2 fpadd f1, f1, f3 fxcpmadd f0, AP, f0, A1 fxcpmadd f1, AP, f1, B3#else fpadd f0, f0, f2 fpadd f1, f1, f3 fpmul f0, AP, f0 fpmul f1, AP, f1#endif STFPDUX f0, CO1, INC2 STFPDUX f1, CO2, INC2#ifdef TRMMKERNEL#if ( defined(LEFT) && defined(TRANSA)) || \ (!defined(LEFT) && !defined(TRANSA)) sub TEMP, K, KK#ifdef LEFT addi TEMP, TEMP, -2#else addi TEMP, TEMP, -2#endif slwi r0, TEMP, 1 + BASE_SHIFT slwi TEMP, TEMP, 1 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LEFT addi KK, KK, 2#endif#endif li r0, FZERO lfpsx f0, SP, r0 .align 4.L80: andi. I, M, 1 beq .L89#if defined(TRMMKERNEL)#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0#else slwi TEMP, KK, 0 + BASE_SHIFT slwi r0, KK, 1 + BASE_SHIFT add AO, AO, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0#endif#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK#elif defined(LEFT) addi TEMP, KK, 1#else addi TEMP, KK, 2#endif srawi. r0, TEMP, 3 mtspr CTR, r0 ble .L84#else addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, K, 3 mtspr CTR, r0 ble .L84#endif LFPDUX B1, BO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX B4, BO, INC2 bdz- .L83 .align 4.L82: fxcpmadd f0, A1, B1, f0 LFPDUX B1, BO, INC2 fxcsmadd f1, A1, B2, f1 LFPDUX B2, BO, INC2 LFPDUX A1, AO, INC2 fxcpmadd f2, A2, B3, f2 LFPDUX B3, BO, INC2 fxcsmadd f3, A2, B4, f3 LFPDUX B4, BO, INC2 LFPDUX A2, AO, INC2 fxcpmadd f0, A3, B1, f0 LFPDUX B1, BO, INC2 fxcsmadd f1, A3, B2, f1 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 fxcpmadd f2, A4, B3, f2 LFPDUX B3, BO, INC2 fxcsmadd f3, A4, B4, f3 LFPDUX B4, BO, INC2 LFPDUX A4, AO, INC2 bdnz+ .L82 .align 4.L83: fxcpmadd f0, A1, B1, f0 LFPDUX B1, BO, INC2 fxcsmadd f1, A1, B2, f1 LFPDUX B2, BO, INC2 fxcpmadd f2, A2, B3, f2 LFPDUX B3, BO, INC2 fxcsmadd f3, A2, B4, f3 LFPDUX B4, BO, INC2 fxcpmadd f0, A3, B1, f0 fxcsmadd f1, A3, B2, f1 fxcpmadd f2, A4, B3, f2 fxcsmadd f3, A4, B4, f3 .align 4.L84: lfd AP, ALPHA(SP)#ifdef TRMMKERNEL fsmfp AP, AP#endif#if defined(TRMMKERNEL)#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK#elif defined(LEFT) addi TEMP, KK, 1#else addi TEMP, KK, 2#endif andi. TEMP, TEMP, 7 mtspr CTR, TEMP#else andi. r0, K, 7 mtspr CTR, r0#endif ble+ .L88 LFDX A1, AO, INC2 LFPDUX B1, BO, INC2 add AO, AO, INC bdz- .L87 .align 4.L86: fxcpmadd f0, A1, B1, f0 LFDX A1, AO, INC2 LFPDUX B1, BO, INC2 add AO, AO, INC bdnz+ .L86 .align 4.L87: fxcpmadd f0, A1, B1, f0 .align 4.L88:#ifndef TRMMKERNEL LFDX A1, CO1, INC2 LFDX A2, CO2, INC2 fpadd f0, f0, f1 fpadd f2, f2, f3 fsmfp A1, A2 fpadd f0, f0, f2 fxcpmadd f0, AP, f0, A1#else fpadd f0, f0, f1 fpadd f2, f2, f3 fsmfp A1, A2 fpadd f0, f0, f2 fpmul f0, AP, f0#endif STFDX f0, CO1, INC2 STFSDX f0, CO2, INC2#ifdef TRMMKERNEL#if ( defined(LEFT) && defined(TRANSA)) || \ (!defined(LEFT) && !defined(TRANSA)) sub TEMP, K, KK#ifdef LEFT addi TEMP, TEMP, -1#else addi TEMP, TEMP, -2#endif slwi r0, TEMP, 0 + BASE_SHIFT slwi TEMP, TEMP, 1 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LEFT addi KK, KK, 1#endif#endif .align 4.L89:#if defined(TRMMKERNEL) && !defined(LEFT) addi KK, KK, 2#endif addi B, BO, 2 * SIZE .align 4.L90: andi. J, N, 1 beq .L999#if defined(TRMMKERNEL) && defined(LEFT) mr KK, OFFSET#endif mr CO1, C addi AO, A, -2 * SIZE li r0, FZERO lfpsx f0, SP, r0 srawi. I, M, 3 ble .L100 .align 4.L91:#if defined(TRMMKERNEL)#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) addi BO, B, - 2 * SIZE fpmr f1, f0#else slwi TEMP, KK, 3 + BASE_SHIFT slwi r0, KK, 0 + BASE_SHIFT add AO, AO, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE fpmr f1, f0#endif#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK#elif defined(LEFT) addi TEMP, KK, 8#else addi TEMP, KK, 1#endif fpmr f2, f0 srawi. r0, TEMP, 2 fpmr f3, f0 mtspr CTR, r0 ble .L94#else srawi. r0, K, 2 fpmr f1, f0 addi BO, B, - 2 * SIZE fpmr f2, f0 fpmr f3, f0 mtspr CTR, r0 ble .L94#endif LFPDUX B1, BO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX A6, AO, INC2 LFPDUX A7, AO, INC2 LFPDUX A8, AO, INC2 bdz- .L93 .align 4.L92: fxcpmadd f0, B1, A1, f0 LFPDUX A1, AO, INC2 fxcpmadd f1, B1, A2, f1 LFPDUX A2, AO, INC2 fxcpmadd f2, B1, A3, f2 LFPDUX A3, AO, INC2 fxcpmadd f3, B1, A4, f3 LFPDUX A4, AO, INC2 fxcsmadd f0, B1, A5, f0 LFPDUX A5, AO, INC2 fxcsmadd f1, B1, A6, f1 LFPDUX A6, AO, INC2 fxcsmadd f2, B1, A7, f2 LFPDUX A7, AO, INC2 fxcsmadd f3, B1, A8, f3 LFPDUX A8, AO, INC2 LFPDUX B1, BO, INC2 fxcpmadd f0, B2, A1, f0 LFPDUX A1, AO, INC2 fxcpmadd f1, B2, A2, f1 LFPDUX A2, AO, INC2 fxcpmadd f2, B2, A3, f2 LFPDUX A3, AO, INC2 fxcpmadd f3, B2, A4, f3 LFPDUX A4, AO, INC2 fxcsmadd f0, B2, A5, f0 LFPDUX A5, AO, INC2 fxcsmadd f1, B2, A6, f1 LFPDUX A6, AO, INC2 fxcsmadd f2, B2, A7, f2 LFPDUX A7, AO, INC2 fxcsmadd f3, B2, A8, f3 LFPDUX A8, AO, INC2 LFPDUX B2, BO, INC2 bdnz+ .L92 .align 4.L93: fxcpmadd f0, B1, A1, f0 LFPDUX A1, AO, INC2 fxcpmadd f1, B1, A2, f1 LFPDUX A2, AO, INC2 fxcpmadd f2, B1, A3, f2 LFPDUX A3, AO, INC2 fxcpmadd f3, B1, A4, f3 LFPDUX A4, AO, INC2 fxcsmadd f0, B1, A5, f0 LFPDUX A5, AO, INC2 fxcsmadd f1, B1, A6, f1 LFPDUX A6, AO, INC2 fxcsmadd f2, B1, A7, f2 LFPDUX A7, AO, INC2 fxcsmadd f3, B1, A8, f3 LFPDUX A8, AO, INC2 fxcpmadd f0, B2, A1, f0 fxcpmadd f1, B2, A2, f1 fxcpmadd f2, B2, A3, f2 fxcpmadd f3, B2, A4, f3 fxcsmadd f0, B2, A5, f0 fxcsmadd f1, B2, A6, f1 fxcsmadd f2, B2, A7, f2 fxcsmadd f3, B2, A8, f3 .align 4.L94: lfd AP, ALPHA(SP)#ifdef TRMMKERNEL fsmfp AP, AP#endif#if defined(TRMMKERNEL)#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK#elif defined(LEFT) addi TEMP, KK, 8#else addi TEMP, KK, 1#endif andi. TEMP, TEMP, 3 mtspr CTR, TEMP#else andi. r0, K, 3 mtspr CTR, r0#endif ble+ .L98 LFDX B1, BO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 add BO, BO, INC bdz- .L97 .align 4.L96: fxcpmadd f0, B1, A1, f0 LFPDUX A1, AO, INC2 fxcpmadd f1, B1, A2, f1 LFPDUX A2, AO, INC2 fxcpmadd f2, B1, A3, f2 LFPDUX A3, AO, INC2 fxcpmadd f3, B1, A4, f3 LFDX B1, BO, INC2 LFPDUX A4, AO, INC2 add BO, BO, INC bdnz+ .L96 .align 4.L97: fxcpmadd f0, B1, A1, f0 fxcpmadd f1, B1, A2, f1 fxcpmadd f2, B1, A3, f2 fxcpmadd f3, B1, A4, f3 .align 4.L98:#ifndef TRMMKERNEL LFPDUX A1, CO1, INC2 LFPDUX B1, CO1, INC2 LFPDUX A3, CO1, INC2 LFPDUX A5, CO1, INC2 fxcpmadd f0, AP, f0, A1 fxcpmadd f1, AP, f1, B1 fxcpmadd f2, AP, f2, A3 fxcpmadd f3, AP, f3, A5 STFPDUX f0, CO1, INCM7 STFPDUX f1, CO1, INC2 STFPDUX f2, CO1, INC2 STFPDUX f3, CO1, INC2#else fpmul f0, AP, f0 fpmul f1, AP, f1 fpmul f2, AP, f2 fpmul f3, AP, f3 STFPDUX f0, CO1, INC2 STFPDUX f1, CO1, INC2 STFPDUX f2, CO1, INC2 STFPDUX f3, CO1, INC2#endif#ifdef TRMMKERNEL#if ( defined(LEFT) && defined(TRANSA)) || \ (!defined(LEFT) && !defined(TRANSA)) sub TEMP, K, KK#ifdef LEFT addi TEMP, TEMP, -8#else addi TEMP, TEMP, -1#endif slwi r0, TEMP, 3 + BASE_SHIFT slwi TEMP, TEMP, 0 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LEFT addi KK, KK, 8#endif#endif addic. I, I, -1 li r0, FZERO lfpsx f0, SP, r0 bgt+ .L91 .align 4.L100: andi. I, M, 4 beq .L110#if defined(TRMMKERNEL)#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0#else slwi TEMP, KK, 2 + BASE_SHIFT slwi r0, KK, 0 + BASE_SHIFT add AO, AO, TEMP add BO, B, r0 fpmr f1, f0 addi BO, BO, - 2 * SIZE fpmr f2, f0 fpmr f3, f0#endif#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK#elif defined(LEFT) addi TEMP, KK, 4#else addi TEMP, KK, 1#endif srawi. r0, TEMP, 3 mtspr CTR, r0 ble .L104#else addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, K, 3 mtspr CTR, r0 ble .L104#endif LFPDUX B1, BO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX A6, AO, INC2 LFPDUX A7, AO, INC2 LFPDUX A8, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX B4, BO, INC2 bdz- .L103 .align 4.L102: fxcpmadd f0, B1, A1, f0 LFPDUX A1, AO, INC2 fxcpmadd f1, B1, A2, f1 LFPDUX A2, AO, INC2 fxcsmadd f2, B1, A3, f2 LFPDUX A3, AO, INC2 fxcsmadd f3, B1, A4, f3 LFPDUX A4, AO, INC2 LFPDUX B1, BO, INC2 fxcpmadd f0, B2, A5, f0 LFPDUX A5, AO, INC2 fxcpmadd f1, B2, A6, f1 LFPDUX A6, AO, INC2 fxcsmadd f2, B2, A7, f2 LFPDUX A7, AO, INC2 fxcsmadd f3, B2, A8, f3 LFPDUX A8, AO, INC2 LFPDUX B2, BO, INC2 fxcpmadd f0, B3, A1, f0 LFPDUX A1, AO, INC2 fxcpmadd f1, B3, A2, f1 LFPDUX A2, AO, INC2 fxcsmadd f2, B3, A3, f2 LFPDUX A3, AO, INC2 fxcsmadd f3, B3, A4, f3 LFPDUX A4, AO, INC2 LFPDUX B3, BO, INC2 fxcpmadd f0, B4, A5, f0 LFPDUX A5, AO, INC2 fxcpmadd f1, B4, A6, f1 LFPDUX A6, AO, INC2 fxcsmadd f2, B4, A7, f2 LFPDUX A7, AO, INC2 fxcsmadd f3, B4, A8, f3 LFPDUX A8, AO, INC2 LFPDUX B4, BO, INC2 bdnz+ .L102 .align 4.L103: fxcpmadd f0, B1, A1, f0 LFPDUX A1, AO, INC2 fxcpmadd f1, B1, A2, f1 LFPDUX A2, AO, INC2 fxcsmadd f2, B1, A3, f2 LFPDUX A3, AO, INC2 fxcsmadd f3, B1, A4, f3 LFPDUX A4, AO, INC2 fxcpmadd f0, B2, A5, f0 LFPDUX A5, AO, INC2 fxcpmadd f1, B2, A6, f1 LFPDUX A6, AO, INC2 fxcsmadd f2, B2, A7, f2 LFPDUX A7, AO, INC2 fxcsmadd f3, B2, A8, f3 LFPDUX A8, AO, INC2 fxcpmadd f0, B3, A1, f0 fxcpmadd f1, B3, A2, f1 fxcsmadd f2, B3, A3, f2 fxcsmadd f3, B3, A4, f3 fxcpmadd f0, B4, A5, f0 fxcpmadd f1, B4, A6, f1 fxcsmadd f2, B4, A7, f2 fxcsmadd f3, B4, A8, f3 .align 4.L104: lfd AP, ALPHA(SP)#ifdef TRMMKERNEL fsmfp AP, AP#endif#if defined(TRMMKERNEL)#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK#elif defined(LEFT) addi TEMP, KK, 4#else addi TEMP, KK, 1#endif andi. TEMP, TEMP, 7 mtspr CTR, TEMP#else andi. r0, K, 7 mtspr CTR, r0#endif ble+ .L108 LFPDUX A1, AO, INC2 LFDX B1, BO, INC2 LFPDUX A2, AO, INC2
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -