📄 trsm_kernel_hummer_lt.s
字号:
fxpmul f0, A2, f0 fxpmul f8, A2, f8#endif#ifdef LT LFPDUX A1, AO, INC4 LFPDUX A2, AO2, INC4 subi AO, AO, 4 * SIZE subi AO2, AO2, 4 * SIZE fxpmul f0, A1, f0 fxpmul f8, A1, f8 fxcsnmsub f4, A1, f0, f4 fxcsnmsub f12, A1, f8, f12 fxsmul f4, A2, f4 fxsmul f12, A2, f12#endif#ifdef RN LFPDUX A1, BO, INC4 LFPDUX A2, BO2, INC4 LFPDUX A3, BO, INC4 LFPDUX A4, BO2, INC4 add BO, BO, INC4 LFPDUX A5, BO2, INC4 add BO, BO, INC4 LFPDUX A6, BO2, INC4 subi BO, BO, 16 * SIZE subi BO2, BO2, 16 * SIZE fxpmul f0, A1, f0 fxcsnmsub f4, A1, f0, f4 fxcpnmsub f8, A2, f0, f8 fxcsnmsub f12, A2, f0, f12 fxsmul f4, A3, f4 fxcpnmsub f8, A4, f4, f8 fxcsnmsub f12, A4, f4, f12 fxpmul f8, A5, f8 fxcsnmsub f12, A5, f8, f12 fxsmul f12, A6, f12#endif#ifdef RT addi BO, BO, 20 * SIZE addi BO2, BO2, 20 * SIZE LFPDUX A1, BO2, INCM4 LFPDUX A2, BO, INCM4 LFPDUX A3, BO2, INCM4 LFPDUX A4, BO, INCM4 add BO2, BO2, INCM4 LFPDUX A5, BO, INCM4 add BO2, BO2, INCM4 LFPDUX A6, BO, INCM4 subi BO, BO, 4 * SIZE subi BO2, BO2, 4 * SIZE fxsmul f12, A1, f12 fxcpnmsub f8, A1, f12, f8 fxcsnmsub f4, A2, f12, f4 fxcpnmsub f0, A2, f12, f0 fxpmul f8, A3, f8 fxcsnmsub f4, A4, f8, f4 fxcpnmsub f0, A4, f8, f0 fxsmul f4, A5, f4 fxcpnmsub f0, A5, f4, f0 fxpmul f0, A6, f0#endif#ifdef LN subi CO1, CO1, 2 * SIZE subi CO2, CO2, 2 * SIZE subi CO3, CO3, 2 * SIZE subi CO4, CO4, 2 * SIZE#endif#if defined(LN) || defined(LT) STFPDUX f0, BO, INC4 STFPDUX f8, BO2, INC4 STFPDUX f4, BO, INC4 STFPDUX f12, BO2, INC4 subi BO, BO, 8 * SIZE subi BO2, BO2, 8 * SIZE STFDUX f0, CO1, INC STFDUX f4, CO1, INC STFSDUX f0, CO2, INC STFSDUX f4, CO2, INC STFDUX f8, CO3, INC STFDUX f12, CO3, INC STFSDUX f8, CO4, INC STFSDUX f12, CO4, INC#else STFPDUX f0, AO, INC4 STFPDUX f4, AO2, INC4 STFPDUX f8, AO, INC4 STFPDUX f12, AO2, INC4 subi AO, AO, 8 * SIZE subi AO2, AO2, 8 * SIZE STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f4, CO2, INC STFSDUX f4, CO2, INC STFDUX f8, CO3, INC STFSDUX f8, CO3, INC STFDUX f12, CO4, INC STFSDUX f12, CO4, INC#endif#ifdef LN subi CO1, CO1, 2 * SIZE subi CO2, CO2, 2 * SIZE subi CO3, CO3, 2 * SIZE subi CO4, CO4, 2 * SIZE#endif#ifdef RT slwi r0, K, 1 + BASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi r0, TEMP, 1 + BASE_SHIFT slwi TEMP, TEMP, 2 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 2#endif#ifdef LN subi KK, KK, 2#endif li r0, FZERO lfpsx f0, SP, r0 .align 4.L40: andi. I, M, 1 beq .L49#if defined(LT) || defined(RN) addi AO2, AO, 2 * SIZE fpmr f1, f0 addi BO, B, - 4 * SIZE fpmr f2, f0 addi BO2, B, - 2 * SIZE fpmr f3, f0 srawi. r0, KK, 3 mtspr CTR, r0 ble .L44#else#ifdef LN slwi r0, K, 0 + BASE_SHIFT sub AORIG, AORIG, r0#endif slwi r0 , KK, 0 + BASE_SHIFT slwi TEMP, KK, 2 + BASE_SHIFT add AO, AORIG, r0 add BO, B, TEMP sub TEMP, K, KK addi AO2, AO, 2 * SIZE fpmr f1, f0 addi BO, BO, - 4 * SIZE fpmr f2, f0 addi BO2, BO, 2 * SIZE fpmr f3, f0 srawi. r0, TEMP, 3 mtspr CTR, r0 ble .L44#endif LFPDUX A1, AO, INC4 LFPDUX B1, BO, INC4 LFPDUX B2, BO2, INC4 LFPDUX A2, AO2, INC4 LFPDUX B3, BO, INC4 LFPDUX B4, BO2, INC4 LFPDUX A3, AO, INC4 LFPDUX A5, BO, INC4 LFPDUX A6, BO2, INC4 LFPDUX A4, AO2, INC4 LFPDUX A7, BO, INC4 LFPDUX A8, BO2, INC4 bdz- .L43 .align 4.L42: fxcpmadd f0, A1, B1, f0 LFPDUX B1, BO, INC4 fxcpmadd f1, A1, B2, f1 LFPDUX B2, BO2, INC4 fxcsmadd f2, A1, B3, f2 LFPDUX B3, BO, INC4 fxcsmadd f3, A1, B4, f3 LFPDUX B4, BO2, INC4 LFPDUX A1, AO, INC4 fxcpmadd f0, A2, A5, f0 LFPDUX A5, BO, INC4 fxcpmadd f1, A2, A6, f1 LFPDUX A6, BO2, INC4 fxcsmadd f2, A2, A7, f2 LFPDUX A7, BO, INC4 fxcsmadd f3, A2, A8, f3 LFPDUX A8, BO2, INC4 LFPDUX A2, AO2, INC4 fxcpmadd f0, A3, B1, f0 LFPDUX B1, BO, INC4 fxcpmadd f1, A3, B2, f1 LFPDUX B2, BO2, INC4 fxcsmadd f2, A3, B3, f2 LFPDUX B3, BO, INC4 fxcsmadd f3, A3, B4, f3 LFPDUX B4, BO2, INC4 LFPDUX A3, AO, INC4 fxcpmadd f0, A4, A5, f0 LFPDUX A5, BO, INC4 fxcpmadd f1, A4, A6, f1 LFPDUX A6, BO2, INC4 fxcsmadd f2, A4, A7, f2 LFPDUX A7, BO, INC4 fxcsmadd f3, A4, A8, f3 LFPDUX A8, BO2, INC4 LFPDUX A4, AO2, INC4 bdnz+ .L42 .align 4.L43: fxcpmadd f0, A1, B1, f0 LFPDUX B1, BO, INC4 fxcpmadd f1, A1, B2, f1 LFPDUX B2, BO2, INC4 fxcsmadd f2, A1, B3, f2 LFPDUX B3, BO, INC4 fxcsmadd f3, A1, B4, f3 LFPDUX B4, BO2, INC4 fxcpmadd f0, A2, A5, f0 LFPDUX A5, BO, INC4 fxcpmadd f1, A2, A6, f1 LFPDUX A6, BO2, INC4 fxcsmadd f2, A2, A7, f2 LFPDUX A7, BO, INC4 fxcsmadd f3, A2, A8, f3 LFPDUX A8, BO2, INC4 fxcpmadd f0, A3, B1, f0 fxcpmadd f1, A3, B2, f1 fxcsmadd f2, A3, B3, f2 fxcsmadd f3, A3, B4, f3 fxcpmadd f0, A4, A5, f0 fxcpmadd f1, A4, A6, f1 fxcsmadd f2, A4, A7, f2 fxcsmadd f3, A4, A8, f3 .align 4.L44:#if defined(LT) || defined(RN) andi. r0, KK, 7 mtspr CTR, r0 ble+ .L48#else andi. r0, TEMP, 7 mtspr CTR, r0 ble+ .L48#endif LFDX A1, AO, INC4 LFPDUX B1, BO, INC4 LFPDUX B2, BO2, INC4 add AO, AO, INC bdz- .L47 .align 4.L46: fxcpmadd f0, A1, B1, f0 LFPDUX B1, BO, INC4 fxcpmadd f1, A1, B2, f1 LFDX A1, AO, INC4 LFPDUX B2, BO2, INC4 add AO, AO, INC bdnz+ .L46 .align 4.L47: fxcpmadd f0, A1, B1, f0 fxcpmadd f1, A1, B2, f1 addi AO2, AO, 2 * SIZE .align 4.L48: fpadd f0, f0, f2 fpadd f1, f1, f3#if defined(LN) || defined(RT)#ifdef LN subi r0, KK, 1#else subi r0, KK, 4#endif slwi TEMP, r0, 0 + BASE_SHIFT slwi r0, r0, 2 + BASE_SHIFT add AO, AORIG, TEMP add BO, B, r0 addi AO2, AO, 2 * SIZE addi BO, BO, - 4 * SIZE addi BO2, BO, 2 * SIZE#endif#if defined(LN) || defined(LT) LFPDX f16, BO, INC4 LFPDX f17, BO2, INC4 fpsub f0, f16, f0 fpsub f1, f17, f1#else LFPDX f16, AO, INC4 LFPDX f17, AO2, INC4 fpsub f0, f16, f0 fpsub f1, f17, f1#endif#if defined(LN) || defined(LT) LFPDX A1, AO, INC4 fxpmul f0, A1, f0 fxpmul f1, A1, f1#endif#ifdef RN LFD A1, (4 + 0) * SIZE(BO) LFD A2, (4 + 1) * SIZE(BO) LFD A3, (4 + 2) * SIZE(BO) LFD A4, (4 + 3) * SIZE(BO) LFD A5, (4 + 5) * SIZE(BO) LFD A6, (4 + 6) * SIZE(BO) LFD A7, (4 + 7) * SIZE(BO) LFD A8, (4 + 10) * SIZE(BO) LFD A9, (4 + 11) * SIZE(BO) LFD A10, (4 + 15) * SIZE(BO) fsmtp f2, f0 fsmtp f3, f1 fmul f0, A1, f0 fnmsub f2, A2, f0, f2 fnmsub f1, A3, f0, f1 fnmsub f3, A4, f0, f3 fmul f2, A5, f2 fnmsub f1, A6, f2, f1 fnmsub f3, A7, f2, f3 fmul f1, A8, f1 fnmsub f3, A9, f1, f3 fmul f3, A10, f3 fsmfp f0, f2 fsmfp f1, f3#endif#ifdef RT LFD A1, (4 + 15) * SIZE(BO) LFD A2, (4 + 14) * SIZE(BO) LFD A3, (4 + 13) * SIZE(BO) LFD A4, (4 + 12) * SIZE(BO) LFD A5, (4 + 10) * SIZE(BO) LFD A6, (4 + 9) * SIZE(BO) LFD A7, (4 + 8) * SIZE(BO) LFD A8, (4 + 5) * SIZE(BO) LFD A9, (4 + 4) * SIZE(BO) LFD A10, (4 + 0) * SIZE(BO) fsmtp f2, f0 fsmtp f3, f1 fmul f3, A1, f3 fnmsub f1, A2, f3, f1 fnmsub f2, A3, f3, f2 fnmsub f0, A4, f3, f0 fmul f1, A5, f1 fnmsub f2, A6, f1, f2 fnmsub f0, A7, f1, f0 fmul f2, A8, f2 fnmsub f0, A9, f2, f0 fmul f0, A10, f0 fsmfp f0, f2 fsmfp f1, f3#endif#if defined(LN) || defined(LT) STFPDX f0, BO, INC4 STFPDX f1, BO2, INC4#else STFPDX f0, AO, INC4 STFPDX f1, AO2, INC4#endif#ifdef LN subi CO1, CO1, 1 * SIZE subi CO2, CO2, 1 * SIZE subi CO3, CO3, 1 * SIZE subi CO4, CO4, 1 * SIZE#endif STFDUX f0, CO1, INC STFSDUX f0, CO2, INC STFDUX f1, CO3, INC STFSDUX f1, CO4, INC#ifdef LN subi CO1, CO1, 1 * SIZE subi CO2, CO2, 1 * SIZE subi CO3, CO3, 1 * SIZE subi CO4, CO4, 1 * SIZE#endif#ifdef RT slwi r0, K, 0 + BASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi r0, TEMP, 0 + BASE_SHIFT slwi TEMP, TEMP, 2 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 1#endif#ifdef LN subi KK, KK, 1#endif .align 4.L49:#ifdef LN slwi r0, K, 2 + BASE_SHIFT add B, B, r0#endif#if defined(LT) || defined(RN) addi B, BO, 4 * SIZE#endif#ifdef RN addi KK, KK, 4#endif#ifdef RT subi KK, KK, 4#endif addic. J, J, -1 bgt+ .L10 .align 4.L50: andi. J, N, 2 beq .L90#ifdef RT slwi r0, K, 1 + BASE_SHIFT sub B, B, r0 slwi r0, LDC, 1 sub C, C, r0#endif mr CO1, C add CO2, C, LDC#ifdef LN add KK, M, OFFSET#endif#ifdef LT mr KK, OFFSET#endif#if defined(LN) || defined(RT) addi AORIG, A, -2 * SIZE#else addi AO, A, -2 * SIZE#endif#ifndef RT add C, CO2, LDC#endif li r0, FZERO lfpsx f0, SP, r0 srawi. I, M, 3 ble .L60 .align 4.L51:#if defined(LT) || defined(RN) fpmr f4, f0 addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f5, f0 fpmr f2, f0 fpmr f6, f0 srawi. r0, KK, 2 fpmr f3, f0 mtspr CTR, r0 fpmr f7, f0 ble .L54#else#ifdef LN slwi r0, K, 3 + BASE_SHIFT sub AORIG, AORIG, r0#endif slwi r0 , KK, 3 + BASE_SHIFT slwi TEMP, KK, 1 + BASE_SHIFT add AO, AORIG, r0 add BO, B, TEMP sub TEMP, K, KK fpmr f4, f0 addi BO, BO, - 2 * SIZE fpmr f1, f0 fpmr f5, f0 fpmr f2, f0 fpmr f6, f0 srawi. r0, TEMP, 2 fpmr f3, f0 mtspr CTR, r0 fpmr f7, f0 ble .L54#endif LFPDUX B1, BO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX A6, AO, INC2 LFPDUX A7, AO, INC2 LFPDUX A8, AO, INC2 bdz- .L53 .align 4.L52: fxcpmadd f0, B1, A1, f0 LFPDUX B4, BO, INC2 fxcsmadd f4, B1, A1, f4 LFPDUX A1, AO, INC2 fxcpmadd f1, B1, A2, f1 nop fxcsmadd f5, B1, A2, f5 LFPDUX A2, AO, INC2 fxcpmadd f2, B1, A3, f2 nop fxcsmadd f6, B1, A3, f6 LFPDUX A3, AO, INC2 fxcpmadd f3, B1, A4, f3 nop fxcsmadd f7, B1, A4, f7 LFPDUX A4, AO, INC2 fxcpmadd f0, B2, A5, f0 LFPDUX B1, BO, INC2 fxcsmadd f4, B2, A5, f4 LFPDUX A5, AO, INC2 fxcpmadd f1, B2, A6, f1 nop fxcsmadd f5, B2, A6, f5 LFPDUX A6, AO, INC2 fxcpmadd f2, B2, A7, f2 nop fxcsmadd f6, B2, A7, f6 LFPDUX A7, AO, INC2 fxcpmadd f3, B2, A8, f3 nop fxcsmadd f7, B2, A8, f7 LFPDUX A8, AO, INC2 fxcpmadd f0, B3, A1, f0 LFPDUX B2, BO, INC2 fxcsmadd f4, B3, A1, f4 LFPDUX A1, AO, INC2 fxcpmadd f1, B3, A2, f1 nop fxcsmadd f5, B3, A2, f5 LFPDUX A2, AO, INC2 fxcpmadd f2, B3, A3, f2 nop fxcsmadd f6, B3, A3, f6 LFPDUX A3, AO, INC2 fxcpmadd f3, B3, A4, f3 nop fxcsmadd f7, B3, A4, f7 LFPDUX A4, AO, INC2 fxcpmadd f0, B4, A5, f0 LFPDUX B3, BO, INC2 fxcsmadd f4, B4, A5, f4 LFPDUX A5, AO, INC2 fxcpmadd f1, B4, A6, f1 nop fxcsmadd f5, B4, A6, f5 LFPDUX A6, AO, INC2 fxcpmadd f2, B4, A7, f2 nop fxcsmadd f6, B4, A7, f6 LFPDUX A7, AO, INC2 fxcpmadd f3, B4, A8, f3 nop fxcsmadd f7, B4, A8, f7 LFPDUX A8, AO, INC2 bdnz+ .L52 .align 4.L53: fxcpmadd f0, B1, A1, f0 LFPDUX B4, BO, INC2 fxcsmadd f4, B1, A1, f4 LFPDUX A1, AO, INC2 fxcpmadd f1, B1, A2, f1 nop fxcsmadd f5, B1, A2, f5 LFPDUX A2, AO, INC2 fxcpmadd f2, B1, A3, f2 nop fxcsmadd f6, B1, A3, f6 LFPDUX A3, AO, INC2 fxcpmadd f3, B1, A4, f3 nop fxcsmadd f7, B1, A4, f7 LFPDUX A4, AO, INC2 fxcpmadd f0, B2, A5, f0 nop fxcsmadd f4, B2, A5, f4 LFPDUX A5, AO, INC2 fxcpmadd f1, B2, A6, f1 nop fxcsmadd f5, B2, A6, f5 LFPDUX A6, AO, INC2 fxcpmadd f2, B2, A7, f2 nop fxcsmadd f6, B2, A7, f6 LFPDUX A7, AO, INC2 fxcpmadd f3, B2, A8, f3 nop fxcsmadd f7, B2, A8, f7 LFPDUX A8, AO, INC2 fxcpmadd f0, B3, A1, f0 fxcsmadd f4, B3, A1, f4 fxcpmadd f1, B3, A2, f1 fxcsmadd f5, B3, A2, f5 fxcpmadd f2, B3, A3, f2 fxcsmadd f6, B3, A3, f6 fxcpmadd f3, B3, A4, f3 fxcsmadd f7, B3, A4, f7 fxcpmadd f0, B4, A5, f0 fxcsmadd f4, B4, A5, f4 fxcpmadd f1, B4, A6, f1 fxcsmadd f5, B4, A6, f5 fxcpmadd f2, B4, A7, f2 fxcsmadd f6, B4, A7, f6 fxcpmadd f3, B4, A8, f3 fxcsmadd f7, B4, A8, f7 .align 4.L54:#if defined(LT) || defined(RN) andi. r0, KK, 3 mtspr CTR, r0 ble+ .L58#else andi. r0, TEMP, 3 mtspr CTR, r0 ble+ .L58#endif LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 LFPDUX A3, AO, INC2
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -