📄 trsm_kernel_hummer_ln.s
字号:
fxcpnmsub f9, A2, f8, f9 fxcsnmsub f5, A2, f0, f5 fxcsnmsub f13, A2, f8, f13 fxcpnmsub f2, A3, f0, f2 fxcpnmsub f10, A3, f8, f10 fxcsnmsub f6, A3, f0, f6 fxcsnmsub f14, A3, f8, f14 fxcpnmsub f3, A4, f0, f3 fxcpnmsub f11, A4, f8, f11 fxcsnmsub f7, A4, f0, f7 fxcsnmsub f15, A4, f8, f15 fxsmul f4, A5, f4 fxsmul f12, A5, f12 fxcpnmsub f1, A6, f4, f1 fxcpnmsub f9, A6, f12, f9 fxcsnmsub f5, A6, f4, f5 fxcsnmsub f13, A6, f12, f13 fxcpnmsub f2, A7, f4, f2 fxcpnmsub f10, A7, f12, f10 fxcsnmsub f6, A7, f4, f6 fxcsnmsub f14, A7, f12, f14 fxcpnmsub f3, A8, f4, f3 fxcpnmsub f11, A8, f12, f11 fxcsnmsub f7, A8, f4, f7 fxcsnmsub f15, A8, f12, f15 add AO, AO, INC4 LFPDUX A1, AO2, INC4 LFPDUX A2, AO, INC4 LFPDUX A3, AO2, INC4 add AO, AO, INC4 LFPDUX A4, AO2, INC4 LFPDUX A5, AO, INC4 LFPDUX A6, AO2, INC4 add AO, AO, INC4 add AO2, AO2, INC4 LFPDUX A7, AO, INC4 LFPDUX A8, AO2, INC4 fxpmul f1, A1, f1 fxpmul f9, A1, f9 fxcsnmsub f5, A1, f1, f5 fxcsnmsub f13, A1, f9, f13 fxcpnmsub f2, A2, f1, f2 fxcpnmsub f10, A2, f9, f10 fxcsnmsub f6, A2, f1, f6 fxcsnmsub f14, A2, f9, f14 fxcpnmsub f3, A3, f1, f3 fxcpnmsub f11, A3, f9, f11 fxcsnmsub f7, A3, f1, f7 fxcsnmsub f15, A3, f9, f15 fxsmul f5, A4, f5 fxsmul f13, A4, f13 fxcpnmsub f2, A5, f5, f2 fxcpnmsub f10, A5, f13, f10 fxcsnmsub f6, A5, f5, f6 fxcsnmsub f14, A5, f13, f14 fxcpnmsub f3, A6, f5, f3 fxcpnmsub f11, A6, f13, f11 fxcsnmsub f7, A6, f5, f7 fxcsnmsub f15, A6, f13, f15 fxpmul f2, A7, f2 fxpmul f10, A7, f10 fxcsnmsub f6, A7, f2, f6 fxcsnmsub f14, A7, f10, f14 fxcpnmsub f3, A8, f2, f3 fxcpnmsub f11, A8, f10, f11 fxcsnmsub f7, A8, f2, f7 fxcsnmsub f15, A8, f10, f15 add AO, AO, INC4 add AO2, AO2, INC4 LFPDUX A1, AO, INC4 LFPDUX A2, AO2, INC4 addi AO, AO, 8 * SIZE addi AO2, AO2, 4 * SIZE LFPDUX A3, AO2, INC4 addi AO, AO, 8 * SIZE addi AO2, AO2, 4 * SIZE LFPDUX A4, AO2, INC4 subi AO, AO, 64 * SIZE subi AO2, AO2, 64 * SIZE fxsmul f6, A1, f6 fxsmul f14, A1, f14 fxcpnmsub f3, A2, f6, f3 fxcpnmsub f11, A2, f14, f11 fxcsnmsub f7, A2, f6, f7 fxcsnmsub f15, A2, f14, f15 fxpmul f3, A3, f3 fxpmul f11, A3, f11 fxcsnmsub f7, A3, f3, f7 fxcsnmsub f15, A3, f11, f15 fxsmul f7, A4, f7 fxsmul f15, A4, f15#endif#ifdef RN LFPDUX A1, BO, INC4 LFPDUX A2, BO2, INC4 LFPDUX A3, BO, INC4 LFPDUX A4, BO2, INC4 add BO, BO, INC4 LFPDUX A5, BO2, INC4 add BO, BO, INC4 LFPDUX A6, BO2, INC4 subi BO, BO, 16 * SIZE subi BO2, BO2, 16 * SIZE fxpmul f0, A1, f0 fxpmul f1, A1, f1 fxpmul f2, A1, f2 fxpmul f3, A1, f3 fxcsnmsub f4, A1, f0, f4 fxcsnmsub f5, A1, f1, f5 fxcsnmsub f6, A1, f2, f6 fxcsnmsub f7, A1, f3, f7 fxcpnmsub f8, A2, f0, f8 fxcpnmsub f9, A2, f1, f9 fxcpnmsub f10, A2, f2, f10 fxcpnmsub f11, A2, f3, f11 fxcsnmsub f12, A2, f0, f12 fxcsnmsub f13, A2, f1, f13 fxcsnmsub f14, A2, f2, f14 fxcsnmsub f15, A2, f3, f15 fxsmul f4, A3, f4 fxsmul f5, A3, f5 fxsmul f6, A3, f6 fxsmul f7, A3, f7 fxcpnmsub f8, A4, f4, f8 fxcpnmsub f9, A4, f5, f9 fxcpnmsub f10, A4, f6, f10 fxcpnmsub f11, A4, f7, f11 fxcsnmsub f12, A4, f4, f12 fxcsnmsub f13, A4, f5, f13 fxcsnmsub f14, A4, f6, f14 fxcsnmsub f15, A4, f7, f15 fxpmul f8, A5, f8 fxpmul f9, A5, f9 fxpmul f10, A5, f10 fxpmul f11, A5, f11 fxcsnmsub f12, A5, f8, f12 fxcsnmsub f13, A5, f9, f13 fxcsnmsub f14, A5, f10, f14 fxcsnmsub f15, A5, f11, f15 fxsmul f12, A6, f12 fxsmul f13, A6, f13 fxsmul f14, A6, f14 fxsmul f15, A6, f15#endif#ifdef RT addi BO, BO, 20 * SIZE addi BO2, BO2, 20 * SIZE LFPDUX A1, BO2, INCM4 LFPDUX A2, BO, INCM4 LFPDUX A3, BO2, INCM4 LFPDUX A4, BO, INCM4 add BO2, BO2, INCM4 LFPDUX A5, BO, INCM4 add BO2, BO2, INCM4 LFPDUX A6, BO, INCM4 subi BO, BO, 4 * SIZE subi BO2, BO2, 4 * SIZE fxsmul f12, A1, f12 fxsmul f13, A1, f13 fxsmul f14, A1, f14 fxsmul f15, A1, f15 fxcpnmsub f8, A1, f12, f8 fxcpnmsub f9, A1, f13, f9 fxcpnmsub f10, A1, f14, f10 fxcpnmsub f11, A1, f15, f11 fxcsnmsub f4, A2, f12, f4 fxcsnmsub f5, A2, f13, f5 fxcsnmsub f6, A2, f14, f6 fxcsnmsub f7, A2, f15, f7 fxcpnmsub f0, A2, f12, f0 fxcpnmsub f1, A2, f13, f1 fxcpnmsub f2, A2, f14, f2 fxcpnmsub f3, A2, f15, f3 fxpmul f8, A3, f8 fxpmul f9, A3, f9 fxpmul f10, A3, f10 fxpmul f11, A3, f11 fxcsnmsub f4, A4, f8, f4 fxcsnmsub f5, A4, f9, f5 fxcsnmsub f6, A4, f10, f6 fxcsnmsub f7, A4, f11, f7 fxcpnmsub f0, A4, f8, f0 fxcpnmsub f1, A4, f9, f1 fxcpnmsub f2, A4, f10, f2 fxcpnmsub f3, A4, f11, f3 fxsmul f4, A5, f4 fxsmul f5, A5, f5 fxsmul f6, A5, f6 fxsmul f7, A5, f7 fxcpnmsub f0, A5, f4, f0 fxcpnmsub f1, A5, f5, f1 fxcpnmsub f2, A5, f6, f2 fxcpnmsub f3, A5, f7, f3 fxpmul f0, A6, f0 fxpmul f1, A6, f1 fxpmul f2, A6, f2 fxpmul f3, A6, f3#endif#ifdef LN subi CO1, CO1, 8 * SIZE subi CO2, CO2, 8 * SIZE subi CO3, CO3, 8 * SIZE subi CO4, CO4, 8 * SIZE#endif#if defined(LN) || defined(LT) STFPDUX f0, BO, INC4 STFPDUX f8, BO2, INC4 STFPDUX f4, BO, INC4 STFPDUX f12, BO2, INC4 STFPDUX f1, BO, INC4 STFPDUX f9, BO2, INC4 STFPDUX f5, BO, INC4 STFPDUX f13, BO2, INC4 STFPDUX f2, BO, INC4 STFPDUX f10, BO2, INC4 STFPDUX f6, BO, INC4 STFPDUX f14, BO2, INC4 STFPDUX f3, BO, INC4 STFPDUX f11, BO2, INC4 STFPDUX f7, BO, INC4 STFPDUX f15, BO2, INC4 subi BO, BO, 32 * SIZE subi BO2, BO2, 32 * SIZE STFDUX f0, CO1, INC STFDUX f4, CO1, INC STFDUX f1, CO1, INC STFDUX f5, CO1, INC STFDUX f2, CO1, INC STFDUX f6, CO1, INC STFDUX f3, CO1, INC STFDUX f7, CO1, INC STFSDUX f0, CO2, INC STFSDUX f4, CO2, INC STFSDUX f1, CO2, INC STFSDUX f5, CO2, INC STFSDUX f2, CO2, INC STFSDUX f6, CO2, INC STFSDUX f3, CO2, INC STFSDUX f7, CO2, INC STFDUX f8, CO3, INC STFDUX f12, CO3, INC STFDUX f9, CO3, INC STFDUX f13, CO3, INC STFDUX f10, CO3, INC STFDUX f14, CO3, INC STFDUX f11, CO3, INC STFDUX f15, CO3, INC STFSDUX f8, CO4, INC STFSDUX f12, CO4, INC STFSDUX f9, CO4, INC STFSDUX f13, CO4, INC STFSDUX f10, CO4, INC STFSDUX f14, CO4, INC STFSDUX f11, CO4, INC STFSDUX f15, CO4, INC#else STFPDUX f0, AO, INC4 STFPDUX f1, AO2, INC4 STFPDUX f2, AO, INC4 STFPDUX f3, AO2, INC4 STFPDUX f4, AO, INC4 STFPDUX f5, AO2, INC4 STFPDUX f6, AO, INC4 STFPDUX f7, AO2, INC4 STFPDUX f8, AO, INC4 STFPDUX f9, AO2, INC4 STFPDUX f10, AO, INC4 STFPDUX f11, AO2, INC4 STFPDUX f12, AO, INC4 STFPDUX f13, AO2, INC4 STFPDUX f14, AO, INC4 STFPDUX f15, AO2, INC4 subi AO, AO, 32 * SIZE subi AO2, AO2, 32 * SIZE STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f1, CO1, INC STFDUX f2, CO1, INC STFSDUX f2, CO1, INC STFDUX f3, CO1, INC STFSDUX f3, CO1, INC STFDUX f4, CO2, INC STFSDUX f4, CO2, INC STFDUX f5, CO2, INC STFSDUX f5, CO2, INC STFDUX f6, CO2, INC STFSDUX f6, CO2, INC STFDUX f7, CO2, INC STFSDUX f7, CO2, INC STFDUX f8, CO3, INC STFSDUX f8, CO3, INC STFDUX f9, CO3, INC STFSDUX f9, CO3, INC STFDUX f10, CO3, INC STFSDUX f10, CO3, INC STFDUX f11, CO3, INC STFSDUX f11, CO3, INC STFDUX f12, CO4, INC STFSDUX f12, CO4, INC STFDUX f13, CO4, INC STFSDUX f13, CO4, INC STFDUX f14, CO4, INC STFSDUX f14, CO4, INC STFDUX f15, CO4, INC STFSDUX f15, CO4, INC#endif#ifdef LN subi CO1, CO1, 8 * SIZE subi CO2, CO2, 8 * SIZE subi CO3, CO3, 8 * SIZE subi CO4, CO4, 8 * SIZE#endif#ifdef RT slwi r0, K, 3 + BASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi r0, TEMP, 3 + BASE_SHIFT slwi TEMP, TEMP, 2 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 8#endif#ifdef LN subi KK, KK, 8#endif addic. I, I, -1 li r0, FZERO lfpsx f0, SP, r0 bgt+ .L11 .align 4.L49:#ifdef LN slwi r0, K, 2 + BASE_SHIFT add B, B, r0#endif#if defined(LT) || defined(RN) addi B, BO, 4 * SIZE#endif#ifdef RN addi KK, KK, 4#endif#ifdef RT subi KK, KK, 4#endif addic. J, J, -1 bgt+ .L10 .align 4.L50: andi. J, N, 2 beq .L90#ifdef RT slwi r0, K, 1 + BASE_SHIFT sub B, B, r0 slwi r0, LDC, 1 sub C, C, r0#endif mr CO1, C add CO2, C, LDC#ifdef LN add KK, M, OFFSET#endif#ifdef LT mr KK, OFFSET#endif#if defined(LN) || defined(RT) addi AORIG, A, -2 * SIZE#else addi AO, A, -2 * SIZE#endif#ifndef RT add C, CO2, LDC#endif li r0, FZERO lfpsx f0, SP, r0 andi. I, M, 1 beq .L60#if defined(LT) || defined(RN) addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, KK, 3 mtspr CTR, r0 ble .L84#else#ifdef LN slwi r0, K, 0 + BASE_SHIFT sub AORIG, AORIG, r0#endif slwi r0 , KK, 0 + BASE_SHIFT slwi TEMP, KK, 1 + BASE_SHIFT add AO, AORIG, r0 add BO, B, TEMP sub TEMP, K, KK addi BO, BO, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, TEMP, 3 mtspr CTR, r0 ble .L84#endif LFPDUX B1, BO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX B4, BO, INC2 bdz- .L83 .align 4.L82: fxcpmadd f0, A1, B1, f0 LFPDUX B1, BO, INC2 fxcsmadd f1, A1, B2, f1 LFPDUX B2, BO, INC2 LFPDUX A1, AO, INC2 fxcpmadd f2, A2, B3, f2 LFPDUX B3, BO, INC2 fxcsmadd f3, A2, B4, f3 LFPDUX B4, BO, INC2 LFPDUX A2, AO, INC2 fxcpmadd f0, A3, B1, f0 LFPDUX B1, BO, INC2 fxcsmadd f1, A3, B2, f1 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 fxcpmadd f2, A4, B3, f2 LFPDUX B3, BO, INC2 fxcsmadd f3, A4, B4, f3 LFPDUX B4, BO, INC2 LFPDUX A4, AO, INC2 bdnz+ .L82 .align 4.L83: fxcpmadd f0, A1, B1, f0 LFPDUX B1, BO, INC2 fxcsmadd f1, A1, B2, f1 LFPDUX B2, BO, INC2 fxcpmadd f2, A2, B3, f2 LFPDUX B3, BO, INC2 fxcsmadd f3, A2, B4, f3 LFPDUX B4, BO, INC2 fxcpmadd f0, A3, B1, f0 fxcsmadd f1, A3, B2, f1 fxcpmadd f2, A4, B3, f2 fxcsmadd f3, A4, B4, f3 .align 4.L84:#if defined(LT) || defined(RN) andi. r0, KK, 7 mtspr CTR, r0 ble+ .L88#else andi. r0, TEMP, 7 mtspr CTR, r0 ble+ .L88#endif LFDX A1, AO, INC2 LFPDUX B1, BO, INC2 add AO, AO, INC bdz- .L87 .align 4.L86: fxcpmadd f0, A1, B1, f0 LFDX A1, AO, INC2 LFPDUX B1, BO, INC2 add AO, AO, INC bdnz+ .L86 .align 4.L87: fxcpmadd f0, A1, B1, f0 .align 4.L88: fpadd f0, f0, f1 fpadd f2, f2, f3 fpadd f0, f0, f2#if defined(LN) || defined(RT)#ifdef LN subi r0, KK, 1#else subi r0, KK, 2#endif slwi TEMP, r0, 0 + BASE_SHIFT slwi r0, r0, 1 + BASE_SHIFT add AO, AORIG, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE#endif#if defined(LN) || defined(LT) LFPDX f16, BO, INC2 fpsub f0, f16, f0#else LFPDX f16, AO, INC2 fpsub f0, f16, f0#endif#ifdef LN LFPDX A1, AO, INC2 fxpmul f0, A1, f0#endif#ifdef LT LFPDX A1, AO, INC2 fxpmul f0, A1, f0#endif#ifdef RN LFD A1, (2 + 0) * SIZE(BO) LFD A2, (2 + 1) * SIZE(BO) LFD A3, (2 + 3) * SIZE(BO) fsmtp f1, f0 fmul f0, A1, f0 fnmsub f1, A2, f0, f1 fmul f1, A3, f1 fsmfp f0, f1#endif#ifdef RT LFD A1, (2 + 3) * SIZE(BO) LFD A2, (2 + 2) * SIZE(BO) LFD A3, (2 + 0) * SIZE(BO) fsmtp f1, f0 fmul f1, A1, f1 fnmsub f0, A2, f1, f0 fmul f0, A3, f0 fsmfp f0, f1#endif#ifdef LN subi CO1, CO1, 1 * SIZE subi CO2, CO2, 1 * SIZE#endif#if defined(LN) || defined(LT) STFPDX f0, BO, INC2 STFDUX f0, CO1, INC STFSDUX f0, CO2, INC#else STFPDX f0, AO, INC2 STFDUX f0, CO1, INC STFDUX f1, CO2, INC#endif#ifdef LN subi CO1, CO1, 1 * SIZE subi CO2, CO2, 1 * SIZE#endif#ifdef RT slwi r0, K, 0 + BASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi r0, TEMP, 0 + BASE_SHIFT slwi TEMP, TEMP, 1 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 1#endif#ifdef LN subi KK, KK, 1#endif li r0, FZERO lfpsx f0, SP, r0 .align 4.L60: andi. I, M, 2 beq .L70#if defined(LT) || defined(RN) addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, KK, 3 mtspr CTR, r0 ble .L74#else#ifdef LN slwi r0, K, 1 + BASE_SHIFT sub AORIG, AORIG, r0#endif slwi r0 , KK, 1 + BASE_SHIFT slwi TEMP, KK, 1 + BASE_SHIFT add AO, AORIG, r0 add BO, B, TEMP sub TEMP, K, KK addi BO, BO, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, TEMP, 3 mtspr CTR, r0 ble .L74#endif LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A4, AO, INC2 LFPDUX B4, BO, INC2 LFPDUX A5, AO, INC2
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -