📄 trsm_kernel_hummer_ln.s
字号:
fxcsnmsub f4, A1, f0, f4 fxcsnmsub f12, A1, f8, f12 fxsmul f4, A2, f4 fxsmul f12, A2, f12#endif#ifdef RN LFPDUX A1, BO, INC4 LFPDUX A2, BO2, INC4 LFPDUX A3, BO, INC4 LFPDUX A4, BO2, INC4 add BO, BO, INC4 LFPDUX A5, BO2, INC4 add BO, BO, INC4 LFPDUX A6, BO2, INC4 subi BO, BO, 16 * SIZE subi BO2, BO2, 16 * SIZE fxpmul f0, A1, f0 fxcsnmsub f4, A1, f0, f4 fxcpnmsub f8, A2, f0, f8 fxcsnmsub f12, A2, f0, f12 fxsmul f4, A3, f4 fxcpnmsub f8, A4, f4, f8 fxcsnmsub f12, A4, f4, f12 fxpmul f8, A5, f8 fxcsnmsub f12, A5, f8, f12 fxsmul f12, A6, f12#endif#ifdef RT addi BO, BO, 20 * SIZE addi BO2, BO2, 20 * SIZE LFPDUX A1, BO2, INCM4 LFPDUX A2, BO, INCM4 LFPDUX A3, BO2, INCM4 LFPDUX A4, BO, INCM4 add BO2, BO2, INCM4 LFPDUX A5, BO, INCM4 add BO2, BO2, INCM4 LFPDUX A6, BO, INCM4 subi BO, BO, 4 * SIZE subi BO2, BO2, 4 * SIZE fxsmul f12, A1, f12 fxcpnmsub f8, A1, f12, f8 fxcsnmsub f4, A2, f12, f4 fxcpnmsub f0, A2, f12, f0 fxpmul f8, A3, f8 fxcsnmsub f4, A4, f8, f4 fxcpnmsub f0, A4, f8, f0 fxsmul f4, A5, f4 fxcpnmsub f0, A5, f4, f0 fxpmul f0, A6, f0#endif#ifdef LN subi CO1, CO1, 2 * SIZE subi CO2, CO2, 2 * SIZE subi CO3, CO3, 2 * SIZE subi CO4, CO4, 2 * SIZE#endif#if defined(LN) || defined(LT) STFPDUX f0, BO, INC4 STFPDUX f8, BO2, INC4 STFPDUX f4, BO, INC4 STFPDUX f12, BO2, INC4 subi BO, BO, 8 * SIZE subi BO2, BO2, 8 * SIZE STFDUX f0, CO1, INC STFDUX f4, CO1, INC STFSDUX f0, CO2, INC STFSDUX f4, CO2, INC STFDUX f8, CO3, INC STFDUX f12, CO3, INC STFSDUX f8, CO4, INC STFSDUX f12, CO4, INC#else STFPDUX f0, AO, INC4 STFPDUX f4, AO2, INC4 STFPDUX f8, AO, INC4 STFPDUX f12, AO2, INC4 subi AO, AO, 8 * SIZE subi AO2, AO2, 8 * SIZE STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f4, CO2, INC STFSDUX f4, CO2, INC STFDUX f8, CO3, INC STFSDUX f8, CO3, INC STFDUX f12, CO4, INC STFSDUX f12, CO4, INC#endif#ifdef LN subi CO1, CO1, 2 * SIZE subi CO2, CO2, 2 * SIZE subi CO3, CO3, 2 * SIZE subi CO4, CO4, 2 * SIZE#endif#ifdef RT slwi r0, K, 1 + BASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi r0, TEMP, 1 + BASE_SHIFT slwi TEMP, TEMP, 2 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 2#endif#ifdef LN subi KK, KK, 2#endif li r0, FZERO lfpsx f0, SP, r0 .align 4.L30: andi. I, M, 4 beq .L40#if defined(LT) || defined(RN) addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, B, - 4 * SIZE fpmr f8, f0 addi BO2, B, - 2 * SIZE fpmr f12, f0 srawi. r0, KK, 2 fpmr f1, f0 fpmr f5, f0 fpmr f9, f0 mtspr CTR, r0 fpmr f13, f0 ble .L24#else#ifdef LN slwi r0, K, 2 + BASE_SHIFT sub AORIG, AORIG, r0#endif slwi r0 , KK, 2 + BASE_SHIFT slwi TEMP, KK, 2 + BASE_SHIFT add AO, AORIG, r0 add BO, B, TEMP sub TEMP, K, KK addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, BO, - 4 * SIZE fpmr f8, f0 addi BO2, BO, 2 * SIZE fpmr f12, f0 srawi. r0, TEMP, 2 fpmr f1, f0 fpmr f5, f0 fpmr f9, f0 mtspr CTR, r0 fpmr f13, f0 ble .L24#endif LFPDUX A1, AO, INC4 LFPDUX B1, BO, INC4 LFPDUX A2, AO2, INC4 LFPDUX B2, BO2, INC4 LFPDUX A3, AO, INC4 LFPDUX B3, BO, INC4 LFPDUX A4, AO2, INC4 LFPDUX B4, BO2, INC4 LFPDUX A5, AO, INC4 LFPDUX B5, BO, INC4 LFPDUX A6, AO2, INC4 LFPDUX B6, BO2, INC4 LFPDUX A7, AO, INC4 LFPDUX A9, BO, INC4 LFPDUX A10, BO2, INC4 bdz- .L23 .align 4.L22: fxcpmadd f0, B1, A1, f0 nop fxcsmadd f4, B1, A1, f4 LFPDUX A8, AO2, INC4 fxcpmadd f8, B2, A1, f8 nop fxcsmadd f12, B2, A1, f12 LFPDUX A1, AO, INC4 fxcpmadd f1, B1, A2, f1 nop fxcsmadd f5, B1, A2, f5 LFPDUX B1, BO, INC4 fxcpmadd f9, B2, A2, f9 nop fxcsmadd f13, B2, A2, f13 LFPDUX B2, BO2, INC4 fxcpmadd f0, B3, A3, f0 nop fxcsmadd f4, B3, A3, f4 LFPDUX A2, AO2, INC4 fxcpmadd f8, B4, A3, f8 nop fxcsmadd f12, B4, A3, f12 LFPDUX A3, AO, INC4 fxcpmadd f1, B3, A4, f1 nop fxcsmadd f5, B3, A4, f5 LFPDUX B3, BO, INC4 fxcpmadd f9, B4, A4, f9 nop fxcsmadd f13, B4, A4, f13 LFPDUX B4, BO2, INC4 fxcpmadd f0, B5, A5, f0 nop fxcsmadd f4, B5, A5, f4 LFPDUX A4, AO2, INC4 fxcpmadd f8, B6, A5, f8 nop fxcsmadd f12, B6, A5, f12 LFPDUX A5, AO, INC4 fxcpmadd f1, B5, A6, f1 nop fxcsmadd f5, B5, A6, f5 LFPDUX B5, BO, INC4 fxcpmadd f9, B6, A6, f9 nop fxcsmadd f13, B6, A6, f13 LFPDUX B6, BO2, INC4 fxcpmadd f0, A9, A7, f0 nop fxcsmadd f4, A9, A7, f4 LFPDUX A6, AO2, INC4 fxcpmadd f8, A10, A7, f8 nop fxcsmadd f12, A10, A7, f12 LFPDUX A7, AO, INC4 fxcpmadd f1, A9, A8, f1 nop fxcsmadd f5, A9, A8, f5 LFPDUX A9, BO, INC4 fxcpmadd f9, A10, A8, f9 nop fxcsmadd f13, A10, A8, f13 LFPDUX A10, BO2, INC4 bdnz+ .L22 .align 4.L23: fxcpmadd f0, B1, A1, f0 fxcsmadd f4, B1, A1, f4 LFPDUX A8, AO2, INC4 fxcpmadd f8, B2, A1, f8 fxcsmadd f12, B2, A1, f12 fxcpmadd f1, B1, A2, f1 fxcsmadd f5, B1, A2, f5 fxcpmadd f9, B2, A2, f9 fxcsmadd f13, B2, A2, f13 fxcpmadd f0, B3, A3, f0 fxcsmadd f4, B3, A3, f4 fxcpmadd f8, B4, A3, f8 fxcsmadd f12, B4, A3, f12 fxcpmadd f1, B3, A4, f1 fxcsmadd f5, B3, A4, f5 fxcpmadd f9, B4, A4, f9 fxcsmadd f13, B4, A4, f13 fxcpmadd f0, B5, A5, f0 fxcsmadd f4, B5, A5, f4 fxcpmadd f8, B6, A5, f8 fxcsmadd f12, B6, A5, f12 fxcpmadd f1, B5, A6, f1 fxcsmadd f5, B5, A6, f5 fxcpmadd f9, B6, A6, f9 fxcsmadd f13, B6, A6, f13 fxcpmadd f0, A9, A7, f0 fxcsmadd f4, A9, A7, f4 fxcpmadd f8, A10, A7, f8 fxcsmadd f12, A10, A7, f12 fxcpmadd f1, A9, A8, f1 fxcsmadd f5, A9, A8, f5 fxcpmadd f9, A10, A8, f9 fxcsmadd f13, A10, A8, f13 .align 4.L24:#if defined(LT) || defined(RN) andi. r0, KK, 3 mtspr CTR, r0 ble+ .L28#else andi. r0, TEMP, 3 mtspr CTR, r0 ble+ .L28#endif LFPDUX A1, AO, INC4 LFPDUX A2, AO2, INC4 LFPDUX B1, BO, INC4 LFPDUX B2, BO2, INC4 bdz- .L27 .align 4.L26: fxcpmadd f0, B1, A1, f0 fxcsmadd f4, B1, A1, f4 fxcpmadd f8, B2, A1, f8 fxcsmadd f12, B2, A1, f12 LFPDUX A1, AO, INC4 fxcpmadd f1, B1, A2, f1 fxcsmadd f5, B1, A2, f5 LFPDUX B1, BO, INC4 fxcpmadd f9, B2, A2, f9 fxcsmadd f13, B2, A2, f13 LFPDUX A2, AO2, INC4 LFPDUX B2, BO2, INC4 bdnz+ .L26 .align 4.L27: fxcpmadd f0, B1, A1, f0 fxcsmadd f4, B1, A1, f4 fxcpmadd f8, B2, A1, f8 fxcsmadd f12, B2, A1, f12 fxcpmadd f1, B1, A2, f1 fxcsmadd f5, B1, A2, f5 fxcpmadd f9, B2, A2, f9 fxcsmadd f13, B2, A2, f13 .align 4.L28:#if defined(LN) || defined(RT)#ifdef LN subi r0, KK, 4#else subi r0, KK, 4#endif slwi TEMP, r0, 2 + BASE_SHIFT slwi r0, r0, 2 + BASE_SHIFT add AO, AORIG, TEMP add BO, B, r0 addi AO2, AO, 2 * SIZE addi BO, BO, - 4 * SIZE addi BO2, BO, 2 * SIZE#endif#if defined(LN) || defined(LT) fpmr f24, f0 fpmr f25, f1 fpmr f28, f8 fpmr f29, f9 fsmfp f0, f4 fsmfp f1, f5 fsmfp f8, f12 fsmfp f9, f13 fsmtp f4, f24 fsmtp f5, f25 fsmtp f12, f28 fsmtp f13, f29 LFPDUX f16, BO, INC4 LFPDUX f17, BO2, INC4 LFPDUX f18, BO, INC4 LFPDUX f19, BO2, INC4 LFPDUX f20, BO, INC4 LFPDUX f21, BO2, INC4 LFPDUX f22, BO, INC4 LFPDUX f23, BO2, INC4 subi BO, BO, 16 * SIZE subi BO2, BO2, 16 * SIZE fpsub f0, f16, f0 fpsub f8, f17, f8 fpsub f4, f18, f4 fpsub f12, f19, f12 fpsub f1, f20, f1 fpsub f9, f21, f9 fpsub f5, f22, f5 fpsub f13, f23, f13#else LFPDUX f16, AO, INC4 LFPDUX f17, AO2, INC4 LFPDUX f18, AO, INC4 LFPDUX f19, AO2, INC4 LFPDUX f20, AO, INC4 LFPDUX f21, AO2, INC4 LFPDUX f22, AO, INC4 LFPDUX f23, AO2, INC4 subi AO, AO, 16 * SIZE subi AO2, AO2, 16 * SIZE fpsub f0, f16, f0 fpsub f1, f17, f1 fpsub f4, f18, f4 fpsub f5, f19, f5 fpsub f8, f20, f8 fpsub f9, f21, f9 fpsub f12, f22, f12 fpsub f13, f23, f13#endif#ifdef LN addi AO, AO, 20 * SIZE addi AO2, AO2, 20 * SIZE LFPDUX A1, AO2, INCM4 LFPDUX A2, AO, INCM4 LFPDUX A3, AO2, INCM4 LFPDUX A4, AO, INCM4 add AO2, AO2, INCM4 LFPDUX A5, AO, INCM4 add AO2, AO2, INCM4 LFPDUX A6, AO, INCM4 addi AO, AO, -4 * SIZE addi AO2, AO2, -4 * SIZE fxsmul f5, A1, f5 fxsmul f13, A1, f13 fxcpnmsub f1, A1, f5, f1 fxcpnmsub f9, A1, f13, f9 fxcsnmsub f4, A2, f5, f4 fxcsnmsub f12, A2, f13, f12 fxcpnmsub f0, A2, f5, f0 fxcpnmsub f8, A2, f13, f8 fxpmul f1, A3, f1 fxpmul f9, A3, f9 fxcsnmsub f4, A4, f1, f4 fxcsnmsub f12, A4, f9, f12 fxcpnmsub f0, A4, f1, f0 fxcpnmsub f8, A4, f9, f8 fxsmul f4, A5, f4 fxsmul f12, A5, f12 fxcpnmsub f0, A5, f4, f0 fxcpnmsub f8, A5, f12, f8 fxpmul f0, A6, f0 fxpmul f8, A6, f8#endif#ifdef LT LFPDUX A1, AO, INC4 LFPDUX A2, AO2, INC4 LFPDUX A3, AO, INC4 LFPDUX A4, AO2, INC4 add AO, AO, INC4 LFPDUX A5, AO2, INC4 add AO, AO, INC4 LFPDUX A6, AO2, INC4 subi AO, AO, 16 * SIZE subi AO2, AO2, 16 * SIZE fxpmul f0, A1, f0 fxpmul f8, A1, f8 fxcsnmsub f4, A1, f0, f4 fxcsnmsub f12, A1, f8, f12 fxcpnmsub f1, A2, f0, f1 fxcpnmsub f9, A2, f8, f9 fxcsnmsub f5, A2, f0, f5 fxcsnmsub f13, A2, f8, f13 fxsmul f4, A3, f4 fxsmul f12, A3, f12 fxcpnmsub f1, A4, f4, f1 fxcpnmsub f9, A4, f12, f9 fxcsnmsub f5, A4, f4, f5 fxcsnmsub f13, A4, f12, f13 fxpmul f1, A5, f1 fxpmul f9, A5, f9 fxcsnmsub f5, A5, f1, f5 fxcsnmsub f13, A5, f9, f13 fxsmul f5, A6, f5 fxsmul f13, A6, f13#endif#ifdef RN LFPDUX A1, BO, INC4 LFPDUX A2, BO2, INC4 LFPDUX A3, BO, INC4 LFPDUX A4, BO2, INC4 add BO, BO, INC4 LFPDUX A5, BO2, INC4 add BO, BO, INC4 LFPDUX A6, BO2, INC4 subi BO, BO, 16 * SIZE subi BO2, BO2, 16 * SIZE fxpmul f0, A1, f0 fxpmul f1, A1, f1 fxcsnmsub f4, A1, f0, f4 fxcsnmsub f5, A1, f1, f5 fxcpnmsub f8, A2, f0, f8 fxcpnmsub f9, A2, f1, f9 fxcsnmsub f12, A2, f0, f12 fxcsnmsub f13, A2, f1, f13 fxsmul f4, A3, f4 fxsmul f5, A3, f5 fxcpnmsub f8, A4, f4, f8 fxcpnmsub f9, A4, f5, f9 fxcsnmsub f12, A4, f4, f12 fxcsnmsub f13, A4, f5, f13 fxpmul f8, A5, f8 fxpmul f9, A5, f9 fxcsnmsub f12, A5, f8, f12 fxcsnmsub f13, A5, f9, f13 fxsmul f12, A6, f12 fxsmul f13, A6, f13#endif#ifdef RT addi BO, BO, 20 * SIZE addi BO2, BO2, 20 * SIZE LFPDUX A1, BO2, INCM4 LFPDUX A2, BO, INCM4 LFPDUX A3, BO2, INCM4 LFPDUX A4, BO, INCM4 add BO2, BO2, INCM4 LFPDUX A5, BO, INCM4 add BO2, BO2, INCM4 LFPDUX A6, BO, INCM4 subi BO, BO, 4 * SIZE subi BO2, BO2, 4 * SIZE fxsmul f12, A1, f12 fxsmul f13, A1, f13 fxcpnmsub f8, A1, f12, f8 fxcpnmsub f9, A1, f13, f9 fxcsnmsub f4, A2, f12, f4 fxcsnmsub f5, A2, f13, f5 fxcpnmsub f0, A2, f12, f0 fxcpnmsub f1, A2, f13, f1 fxpmul f8, A3, f8 fxpmul f9, A3, f9 fxcsnmsub f4, A4, f8, f4 fxcsnmsub f5, A4, f9, f5 fxcpnmsub f0, A4, f8, f0 fxcpnmsub f1, A4, f9, f1 fxsmul f4, A5, f4 fxsmul f5, A5, f5 fxcpnmsub f0, A5, f4, f0 fxcpnmsub f1, A5, f5, f1 fxpmul f0, A6, f0 fxpmul f1, A6, f1#endif#ifdef LN subi CO1, CO1, 4 * SIZE subi CO2, CO2, 4 * SIZE subi CO3, CO3, 4 * SIZE subi CO4, CO4, 4 * SIZE#endif#if defined(LN) || defined(LT) STFPDUX f0, BO, INC4 STFPDUX f8, BO2, INC4 STFPDUX f4, BO, INC4 STFPDUX f12, BO2, INC4 STFPDUX f1, BO, INC4 STFPDUX f9, BO2, INC4 STFPDUX f5, BO, INC4 STFPDUX f13, BO2, INC4 subi BO, BO, 16 * SIZE subi BO2, BO2, 16 * SIZE STFDUX f0, CO1, INC STFDUX f4, CO1, INC STFDUX f1, CO1, INC STFDUX f5, CO1, INC STFSDUX f0, CO2, INC STFSDUX f4, CO2, INC STFSDUX f1, CO2, INC STFSDUX f5, CO2, INC STFDUX f8, CO3, INC STFDUX f12, CO3, INC STFDUX f9, CO3, INC STFDUX f13, CO3, INC STFSDUX f8, CO4, INC STFSDUX f12, CO4, INC STFSDUX f9, CO4, INC STFSDUX f13, CO4, INC#else STFPDUX f0, AO, INC4 STFPDUX f1, AO2, INC4 STFPDUX f4, AO, INC4 STFPDUX f5, AO2, INC4 STFPDUX f8, AO, INC4 STFPDUX f9, AO2, INC4 STFPDUX f12, AO, INC4 STFPDUX f13, AO2, INC4 subi AO, AO, 16 * SIZE subi AO2, AO2, 16 * SIZE STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f1, CO1, INC STFDUX f4, CO2, INC STFSDUX f4, CO2, INC STFDUX f5, CO2, INC STFSDUX f5, CO2, INC STFDUX f8, CO3, INC STFSDUX f8, CO3, INC STFDUX f9, CO3, INC STFSDUX f9, CO3, INC STFDUX f12, CO4, INC STFSDUX f12, CO4, INC STFDUX f13, CO4, INC STFSDUX f13, CO4, INC#endif#ifdef LN subi CO1, CO1, 4 * SIZE subi CO2, CO2, 4 * SIZE subi CO3, CO3, 4 * SIZE subi CO4, CO4, 4 * SIZE#endif#ifdef RT slwi r0, K, 2 + BASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi r0, TEMP, 2 + BASE_SHIFT slwi TEMP, TEMP, 2 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 4#endif#ifdef LN subi KK, KK, 4#endif li r0, FZERO lfpsx f0, SP, r0 .align 4.L40: srawi. I, M, 3 ble .L49 .align 4.L11:#if defined(LT) || defined(RN) addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, B, - 4 * SIZE fpmr f8, f0 addi BO2, B, - 2 * SIZE fpmr f12, f0 fpmr f5, f0 fpmr f9, f0 fpmr f13, f0 fpmr f2, f0 fpmr f6, f0 fpmr f10, f0 fpmr f14, f0 fpmr f3, f0 fpmr f7, f0 fpmr f11, f0
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -