📄 trsm_kernel_hummer_rt.s
字号:
fxpmul f0, A6, f0#endif#ifdef LT LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 add AO, AO, INC2 LFPDUX A5, AO, INC2 add AO, AO, INC2 LFPDUX A6, AO, INC2 subi AO, AO, 16 * SIZE fxpmul f0, A1, f0 fxcsnmsub f2, A1, f0, f2 fxcpnmsub f1, A2, f0, f1 fxcsnmsub f3, A2, f0, f3 fxsmul f2, A3, f2 fxcpnmsub f1, A4, f2, f1 fxcsnmsub f3, A4, f2, f3 fxpmul f1, A5, f1 fxcsnmsub f3, A5, f1, f3 fxsmul f3, A6, f3#endif#ifdef RN LFPDUX A1, BO, INC2 LFPDUX A2, BO, INC2 subi BO, BO, 4 * SIZE fxpmul f0, A1, f0 fxpmul f1, A1, f1 fxcsnmsub f2, A1, f0, f2 fxcsnmsub f3, A1, f1, f3 fxsmul f2, A2, f2 fxsmul f3, A2, f3#endif#ifdef RT LFPDUX A2, BO, INC2 LFPDUX A1, BO, INC2 subi BO, BO, 4 * SIZE fxsmul f2, A1, f2 fxsmul f3, A1, f3 fxcpnmsub f0, A1, f2, f0 fxcpnmsub f1, A1, f3, f1 fxpmul f0, A2, f0 fxpmul f1, A2, f1#endif#ifdef LN subi CO1, CO1, 4 * SIZE subi CO2, CO2, 4 * SIZE#endif#if defined(LN) || defined(LT) STFPDUX f0, BO, INC2 STFPDUX f2, BO, INC2 STFPDUX f1, BO, INC2 STFPDUX f3, BO, INC2 subi BO, BO, 8 * SIZE STFDUX f0, CO1, INC STFDUX f2, CO1, INC STFDUX f1, CO1, INC STFDUX f3, CO1, INC STFSDUX f0, CO2, INC STFSDUX f2, CO2, INC STFSDUX f1, CO2, INC STFSDUX f3, CO2, INC#else STFPDUX f0, AO, INC2 STFPDUX f1, AO, INC2 STFPDUX f2, AO, INC2 STFPDUX f3, AO, INC2 subi AO, AO, 8 * SIZE STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f1, CO1, INC STFDUX f2, CO2, INC STFSDUX f2, CO2, INC STFDUX f3, CO2, INC STFSDUX f3, CO2, INC#endif#ifdef LN subi CO1, CO1, 4 * SIZE subi CO2, CO2, 4 * SIZE#endif#ifdef RT slwi r0, K, 2 + BASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi r0, TEMP, 2 + BASE_SHIFT slwi TEMP, TEMP, 1 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 4#endif#ifdef LN subi KK, KK, 4#endif li r0, FZERO lfpsx f0, SP, r0 .align 4.L70: andi. I, M, 2 beq .L80#if defined(LT) || defined(RN) addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, KK, 3 mtspr CTR, r0 ble .L74#else#ifdef LN slwi r0, K, 1 + BASE_SHIFT sub AORIG, AORIG, r0#endif slwi r0 , KK, 1 + BASE_SHIFT slwi TEMP, KK, 1 + BASE_SHIFT add AO, AORIG, r0 add BO, B, TEMP sub TEMP, K, KK addi BO, BO, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, TEMP, 3 mtspr CTR, r0 ble .L74#endif LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A4, AO, INC2 LFPDUX B4, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX B5, BO, INC2 LFPDUX A6, AO, INC2 LFPDUX B6, BO, INC2 LFPDUX A7, AO, INC2 LFPDUX A9, BO, INC2 LFPDUX A8, AO, INC2 LFPDUX A10, BO, INC2 bdz- .L73 .align 4.L72: fxcpmadd f0, B1, A1, f0 fxcsmadd f1, B1, A1, f1 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 fxcpmadd f2, B2, A2, f2 fxcsmadd f3, B2, A2, f3 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 fxcpmadd f0, B3, A3, f0 fxcsmadd f1, B3, A3, f1 LFPDUX A3, AO, INC2 LFPDUX B3, BO, INC2 fxcpmadd f2, B4, A4, f2 fxcsmadd f3, B4, A4, f3 LFPDUX A4, AO, INC2 LFPDUX B4, BO, INC2 fxcpmadd f0, B5, A5, f0 fxcsmadd f1, B5, A5, f1 LFPDUX A5, AO, INC2 LFPDUX B5, BO, INC2 fxcpmadd f2, B6, A6, f2 fxcsmadd f3, B6, A6, f3 LFPDUX A6, AO, INC2 LFPDUX B6, BO, INC2 fxcpmadd f0, A9, A7, f0 fxcsmadd f1, A9, A7, f1 LFPDUX A7, AO, INC2 LFPDUX A9, BO, INC2 fxcpmadd f2, A10, A8, f2 fxcsmadd f3, A10, A8, f3 LFPDUX A8, AO, INC2 LFPDUX A10, BO, INC2 bdnz+ .L72 .align 4.L73: fxcpmadd f0, B1, A1, f0 fxcsmadd f1, B1, A1, f1 fxcpmadd f2, B2, A2, f2 fxcsmadd f3, B2, A2, f3 fxcpmadd f0, B3, A3, f0 fxcsmadd f1, B3, A3, f1 fxcpmadd f2, B4, A4, f2 fxcsmadd f3, B4, A4, f3 fxcpmadd f0, B5, A5, f0 fxcsmadd f1, B5, A5, f1 fxcpmadd f2, B6, A6, f2 fxcsmadd f3, B6, A6, f3 fxcpmadd f0, A9, A7, f0 fxcsmadd f1, A9, A7, f1 fxcpmadd f2, A10, A8, f2 fxcsmadd f3, A10, A8, f3 .align 4.L74:#if defined(LT) || defined(RN) andi. r0, KK, 7 mtspr CTR, r0 ble+ .L78#else andi. r0, TEMP, 7 mtspr CTR, r0 ble+ .L78#endif LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 bdz- .L77 .align 4.L76: fxcpmadd f0, B1, A1, f0 fxcsmadd f1, B1, A1, f1 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 bdnz+ .L76 .align 4.L77: fxcpmadd f0, B1, A1, f0 fxcsmadd f1, B1, A1, f1 .align 4.L78: fpadd f0, f0, f2 fpadd f1, f1, f3#if defined(LN) || defined(RT)#ifdef LN subi r0, KK, 2#else subi r0, KK, 2#endif slwi TEMP, r0, 1 + BASE_SHIFT slwi r0, r0, 1 + BASE_SHIFT add AO, AORIG, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE#endif#if defined(LN) || defined(LT) fpmr f24, f0 fsmfp f0, f1 fsmtp f1, f24 LFPDUX f16, BO, INC2 LFPDUX f17, BO, INC2 subi BO, BO, 4 * SIZE fpsub f0, f16, f0 fpsub f1, f17, f1#else LFPDUX f16, AO, INC2 LFPDUX f17, AO, INC2 subi AO, AO, 4 * SIZE fpsub f0, f16, f0 fpsub f1, f17, f1#endif#ifdef LN LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 addi AO, AO, -4 * SIZE fxsmul f1, A2, f1 fxcpnmsub f0, A2, f1, f0 fxpmul f0, A1, f0#endif#ifdef LT LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 addi AO, AO, -4 * SIZE fxpmul f0, A1, f0 fxcsnmsub f1, A1, f0, f1 fxsmul f1, A2, f1#endif#ifdef RN LFPDUX A1, BO, INC2 LFPDUX A2, BO, INC2 subi BO, BO, 4 * SIZE fxpmul f0, A1, f0 fxcsnmsub f1, A1, f0, f1 fxsmul f1, A2, f1#endif#ifdef RT LFPDUX A2, BO, INC2 LFPDUX A1, BO, INC2 subi BO, BO, 4 * SIZE fxsmul f1, A1, f1 fxcpnmsub f0, A1, f1, f0 fxpmul f0, A2, f0#endif#ifdef LN subi CO1, CO1, 2 * SIZE subi CO2, CO2, 2 * SIZE#endif#if defined(LN) || defined(LT) STFPDUX f0, BO, INC2 STFPDUX f1, BO, INC2 subi BO, BO, 4 * SIZE STFDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f0, CO2, INC STFSDUX f1, CO2, INC#else STFPDUX f0, AO, INC2 STFPDUX f1, AO, INC2 subi AO, AO, 4 * SIZE STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f1, CO2, INC STFSDUX f1, CO2, INC#endif#ifdef LN subi CO1, CO1, 2 * SIZE subi CO2, CO2, 2 * SIZE#endif#ifdef RT slwi r0, K, 1 + BASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi r0, TEMP, 1 + BASE_SHIFT slwi TEMP, TEMP, 1 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 2#endif#ifdef LN subi KK, KK, 2#endif li r0, FZERO lfpsx f0, SP, r0 .align 4.L80: andi. I, M, 1 beq .L89#if defined(LT) || defined(RN) addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, KK, 3 mtspr CTR, r0 ble .L84#else#ifdef LN slwi r0, K, 0 + BASE_SHIFT sub AORIG, AORIG, r0#endif slwi r0 , KK, 0 + BASE_SHIFT slwi TEMP, KK, 1 + BASE_SHIFT add AO, AORIG, r0 add BO, B, TEMP sub TEMP, K, KK addi BO, BO, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, TEMP, 3 mtspr CTR, r0 ble .L84#endif LFPDUX B1, BO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX B4, BO, INC2 bdz- .L83 .align 4.L82: fxcpmadd f0, A1, B1, f0 LFPDUX B1, BO, INC2 fxcsmadd f1, A1, B2, f1 LFPDUX B2, BO, INC2 LFPDUX A1, AO, INC2 fxcpmadd f2, A2, B3, f2 LFPDUX B3, BO, INC2 fxcsmadd f3, A2, B4, f3 LFPDUX B4, BO, INC2 LFPDUX A2, AO, INC2 fxcpmadd f0, A3, B1, f0 LFPDUX B1, BO, INC2 fxcsmadd f1, A3, B2, f1 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 fxcpmadd f2, A4, B3, f2 LFPDUX B3, BO, INC2 fxcsmadd f3, A4, B4, f3 LFPDUX B4, BO, INC2 LFPDUX A4, AO, INC2 bdnz+ .L82 .align 4.L83: fxcpmadd f0, A1, B1, f0 LFPDUX B1, BO, INC2 fxcsmadd f1, A1, B2, f1 LFPDUX B2, BO, INC2 fxcpmadd f2, A2, B3, f2 LFPDUX B3, BO, INC2 fxcsmadd f3, A2, B4, f3 LFPDUX B4, BO, INC2 fxcpmadd f0, A3, B1, f0 fxcsmadd f1, A3, B2, f1 fxcpmadd f2, A4, B3, f2 fxcsmadd f3, A4, B4, f3 .align 4.L84:#if defined(LT) || defined(RN) andi. r0, KK, 7 mtspr CTR, r0 ble+ .L88#else andi. r0, TEMP, 7 mtspr CTR, r0 ble+ .L88#endif LFDX A1, AO, INC2 LFPDUX B1, BO, INC2 add AO, AO, INC bdz- .L87 .align 4.L86: fxcpmadd f0, A1, B1, f0 LFDX A1, AO, INC2 LFPDUX B1, BO, INC2 add AO, AO, INC bdnz+ .L86 .align 4.L87: fxcpmadd f0, A1, B1, f0 .align 4.L88: fpadd f0, f0, f1 fpadd f2, f2, f3 fpadd f0, f0, f2#if defined(LN) || defined(RT)#ifdef LN subi r0, KK, 1#else subi r0, KK, 2#endif slwi TEMP, r0, 0 + BASE_SHIFT slwi r0, r0, 1 + BASE_SHIFT add AO, AORIG, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE#endif#if defined(LN) || defined(LT) LFPDX f16, BO, INC2 fpsub f0, f16, f0#else LFPDX f16, AO, INC2 fpsub f0, f16, f0#endif#ifdef LN LFPDX A1, AO, INC2 fxpmul f0, A1, f0#endif#ifdef LT LFPDX A1, AO, INC2 fxpmul f0, A1, f0#endif#ifdef RN LFD A1, (2 + 0) * SIZE(BO) LFD A2, (2 + 1) * SIZE(BO) LFD A3, (2 + 3) * SIZE(BO) fsmtp f1, f0 fmul f0, A1, f0 fnmsub f1, A2, f0, f1 fmul f1, A3, f1 fsmfp f0, f1#endif#ifdef RT LFD A1, (2 + 3) * SIZE(BO) LFD A2, (2 + 2) * SIZE(BO) LFD A3, (2 + 0) * SIZE(BO) fsmtp f1, f0 fmul f1, A1, f1 fnmsub f0, A2, f1, f0 fmul f0, A3, f0 fsmfp f0, f1#endif#ifdef LN subi CO1, CO1, 1 * SIZE subi CO2, CO2, 1 * SIZE#endif#if defined(LN) || defined(LT) STFPDX f0, BO, INC2 STFDUX f0, CO1, INC STFSDUX f0, CO2, INC#else STFPDX f0, AO, INC2 STFDUX f0, CO1, INC STFDUX f1, CO2, INC#endif#ifdef LN subi CO1, CO1, 1 * SIZE subi CO2, CO2, 1 * SIZE#endif#ifdef RT slwi r0, K, 0 + BASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi r0, TEMP, 0 + BASE_SHIFT slwi TEMP, TEMP, 1 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 1#endif#ifdef LN subi KK, KK, 1#endif .align 4.L89:#ifdef LN slwi r0, K, 1 + BASE_SHIFT add B, B, r0#endif#if defined(LT) || defined(RN) addi B, BO, 2 * SIZE#endif#ifdef RN addi KK, KK, 2#endif#ifdef RT subi KK, KK, 2#endif .align 4.L90: srawi. J, N, 2 ble .L999 .align 4.L10:#ifdef RT slwi r0, K, 2 + BASE_SHIFT sub B, B, r0 slwi r0, LDC, 2 sub C, C, r0#endif mr CO1, C add CO2, C, LDC add CO3, CO2, LDC add CO4, CO3, LDC#ifdef LN add KK, M, OFFSET#endif#ifdef LT mr KK, OFFSET#endif#if defined(LN) || defined(RT) addi AORIG, A, -4 * SIZE#else addi AO, A, -4 * SIZE#endif#ifndef RT add C, CO4, LDC#endif li r0, FZERO lfpsx f0, SP, r0 srawi. I, M, 3 ble .L20 .align 4.L11:#if defined(LT) || defined(RN) addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, B, - 4 * SIZE fpmr f8, f0 addi BO2, B, - 2 * SIZE fpmr f12, f0 fpmr f5, f0 fpmr f9, f0 fpmr f13, f0 fpmr f2, f0 fpmr f6, f0 fpmr f10, f0 fpmr f14, f0 fpmr f3, f0 fpmr f7, f0 fpmr f11, f0 fpmr f15, f0 nop srawi. r0, KK, 2 fpmr f1, f0 mtspr CTR, r0 ble .L14#else#ifdef LN slwi r0, K, 3 + BASE_SHIFT sub AORIG, AORIG, r0#endif slwi r0 , KK, 3 + BASE_SHIFT slwi TEMP, KK, 2 + BASE_SHIFT add AO, AORIG, r0 add BO, B, TEMP sub TEMP, K, KK addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, BO, - 4 * SIZE fpmr f8, f0 addi BO2, BO, 2 * SIZE fpmr f12, f0 fpmr f5, f0 fpmr f9, f0 fpmr f13, f0 fpmr f2, f0 fpmr f6, f0 fpmr f10, f0 fpmr f14, f0 fpmr f3, f0 fpmr f7, f0 fpmr f11, f0 fpmr f15, f0 nop srawi. r0, TEMP, 2 fpmr f1, f0
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -