📄 trsm_kernel_hummer_rt.s
字号:
fxcpmadd f1, B4, A6, f1 LFPDUX A6, AO, INC2 fxcsmadd f2, B4, A7, f2 LFPDUX A7, AO, INC2 fxcsmadd f3, B4, A8, f3 LFPDUX A8, AO, INC2 LFPDUX B4, BO, INC2 bdnz+ .L102 .align 4.L103: fxcpmadd f0, B1, A1, f0 LFPDUX A1, AO, INC2 fxcpmadd f1, B1, A2, f1 LFPDUX A2, AO, INC2 fxcsmadd f2, B1, A3, f2 LFPDUX A3, AO, INC2 fxcsmadd f3, B1, A4, f3 LFPDUX A4, AO, INC2 fxcpmadd f0, B2, A5, f0 LFPDUX A5, AO, INC2 fxcpmadd f1, B2, A6, f1 LFPDUX A6, AO, INC2 fxcsmadd f2, B2, A7, f2 LFPDUX A7, AO, INC2 fxcsmadd f3, B2, A8, f3 LFPDUX A8, AO, INC2 fxcpmadd f0, B3, A1, f0 fxcpmadd f1, B3, A2, f1 fxcsmadd f2, B3, A3, f2 fxcsmadd f3, B3, A4, f3 fxcpmadd f0, B4, A5, f0 fxcpmadd f1, B4, A6, f1 fxcsmadd f2, B4, A7, f2 fxcsmadd f3, B4, A8, f3 .align 4.L104:#if defined(LT) || defined(RN) andi. r0, KK, 7 mtspr CTR, r0 ble+ .L108#else andi. r0, TEMP, 7 mtspr CTR, r0 ble+ .L108#endif LFPDUX A1, AO, INC2 LFDX B1, BO, INC2 LFPDUX A2, AO, INC2 add BO, BO, INC bdz- .L107 .align 4.L106: fxcpmadd f0, B1, A1, f0 LFPDUX A1, AO, INC2 fxcpmadd f1, B1, A2, f1 LFDX B1, BO, INC2 LFPDUX A2, AO, INC2 add BO, BO, INC bdnz+ .L106 .align 4.L107: fxcpmadd f0, B1, A1, f0 fxcpmadd f1, B1, A2, f1 .align 4.L108: fpadd f0, f0, f2 fpadd f1, f1, f3#if defined(LN) || defined(RT)#ifdef LN subi r0, KK, 4#else subi r0, KK, 1#endif slwi TEMP, r0, 2 + BASE_SHIFT slwi r0, r0, 0 + BASE_SHIFT add AO, AORIG, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE#endif#if defined(LN) || defined(LT) LFPDUX f16, BO, INC2 LFPDUX f17, BO, INC2 subi BO, BO, 4 * SIZE fpsub f0, f16, f0 fpsub f1, f17, f1#else LFPDUX f16, AO, INC2 LFPDUX f17, AO, INC2 subi AO, AO, 4 * SIZE fpsub f0, f16, f0 fpsub f1, f17, f1#endif#ifdef LN fsmtp f4, f0 fsmtp f5, f1 LFD A1, (2 + 15) * SIZE(AO) LFD A2, (2 + 14) * SIZE(AO) LFD A3, (2 + 13) * SIZE(AO) LFD A4, (2 + 12) * SIZE(AO) fmul f5, A1, f5 fnmsub f1, A2, f5, f1 fnmsub f4, A3, f5, f4 fnmsub f0, A4, f5, f0 LFD A1, (2 + 10) * SIZE(AO) LFD A2, (2 + 9) * SIZE(AO) LFD A3, (2 + 8) * SIZE(AO) fmul f1, A1, f1 fnmsub f4, A2, f1, f4 fnmsub f0, A3, f1, f0 LFD A1, (2 + 5) * SIZE(AO) LFD A2, (2 + 4) * SIZE(AO) fmul f4, A1, f4 fnmsub f0, A2, f4, f0 LFD A1, (2 + 0) * SIZE(AO) fmul f0, A1, f0 fsmfp f0, f4 fsmfp f1, f5#endif#ifdef LT fsmtp f4, f0 fsmtp f5, f1 LFD A1, (2 + 0) * SIZE(AO) LFD A2, (2 + 1) * SIZE(AO) LFD A3, (2 + 2) * SIZE(AO) LFD A4, (2 + 3) * SIZE(AO) fmul f0, A1, f0 fnmsub f4, A2, f0, f4 fnmsub f1, A3, f0, f1 fnmsub f5, A4, f0, f5 LFD A1, (2 + 5) * SIZE(AO) LFD A2, (2 + 6) * SIZE(AO) LFD A3, (2 + 7) * SIZE(AO) fmul f4, A1, f4 fnmsub f1, A2, f4, f1 fnmsub f5, A3, f4, f5 LFD A1, (2 + 10) * SIZE(AO) LFD A2, (2 + 11) * SIZE(AO) fmul f1, A1, f1 fnmsub f5, A2, f1, f5 LFD A1, (2 + 15) * SIZE(AO) fmul f5, A1, f5 fsmfp f0, f4 fsmfp f1, f5#endif#ifdef RN LFPDX A1, BO, INC2 fxpmul f0, A1, f0 fxpmul f1, A1, f1#endif#ifdef RT LFPDX A1, BO, INC2 fxpmul f0, A1, f0 fxpmul f1, A1, f1#endif#ifdef LN subi CO1, CO1, 4 * SIZE#endif#if defined(LN) || defined(LT) STFPDUX f0, BO, INC2 STFPDUX f1, BO, INC2 subi BO, BO, 4 * SIZE STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f1, CO1, INC#else STFPDUX f0, AO, INC2 STFPDUX f1, AO, INC2 subi AO, AO, 4 * SIZE STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f1, CO1, INC#endif#ifdef LN subi CO1, CO1, 4 * SIZE#endif#ifdef RT slwi r0, K, 2 + BASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi r0, TEMP, 2 + BASE_SHIFT slwi TEMP, TEMP, 0 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 4#endif#ifdef LN subi KK, KK, 4#endif li r0, FZERO lfpsx f0, SP, r0 .align 4.L110: andi. I, M, 2 beq .L120#if defined(LT) || defined(RN) addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, KK, 3 mtspr CTR, r0 ble .L114#else#ifdef LN slwi r0, K, 1 + BASE_SHIFT sub AORIG, AORIG, r0#endif slwi r0 , KK, 1 + BASE_SHIFT slwi TEMP, KK, 0 + BASE_SHIFT add AO, AORIG, r0 add BO, B, TEMP sub TEMP, K, KK addi BO, BO, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, TEMP, 3 mtspr CTR, r0 ble .L114#endif LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX A6, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A7, AO, INC2 LFPDUX A8, AO, INC2 LFPDUX B4, BO, INC2 bdz- .L113 .align 4.L112: fxcpmadd f0, B1, A1, f0 LFPDUX A1, AO, INC2 fxcsmadd f1, B1, A2, f1 LFPDUX A2, AO, INC2 LFPDUX B1, BO, INC2 fxcpmadd f2, B2, A3, f2 LFPDUX A3, AO, INC2 fxcsmadd f3, B2, A4, f3 LFPDUX A4, AO, INC2 LFPDUX B2, BO, INC2 fxcpmadd f0, B3, A5, f0 LFPDUX A5, AO, INC2 fxcsmadd f1, B3, A6, f1 LFPDUX A6, AO, INC2 LFPDUX B3, BO, INC2 fxcpmadd f2, B4, A7, f2 LFPDUX A7, AO, INC2 fxcsmadd f3, B4, A8, f3 LFPDUX A8, AO, INC2 LFPDUX B4, BO, INC2 bdnz+ .L112 .align 4.L113: fxcpmadd f0, B1, A1, f0 fxcsmadd f1, B1, A2, f1 fxcpmadd f2, B2, A3, f2 fxcsmadd f3, B2, A4, f3 fxcpmadd f0, B3, A5, f0 fxcsmadd f1, B3, A6, f1 fxcpmadd f2, B4, A7, f2 fxcsmadd f3, B4, A8, f3 .align 4.L114:#if defined(LT) || defined(RN) andi. r0, KK, 7 mtspr CTR, r0 ble+ .L118#else andi. r0, TEMP, 7 mtspr CTR, r0 ble+ .L118#endif LFPDUX A1, AO, INC2 LFDX B1, BO, INC2 add BO, BO, INC bdz- .L117 .align 4.L116: fxcpmadd f0, B1, A1, f0 LFPDUX A1, AO, INC2 LFDX B1, BO, INC2 add BO, BO, INC bdnz+ .L116 .align 4.L117: fxcpmadd f0, B1, A1, f0 .align 4.L118: fpadd f0, f0, f1 fpadd f2, f3, f2 fpadd f0, f0, f2#if defined(LN) || defined(RT)#ifdef LN subi r0, KK, 2#else subi r0, KK, 1#endif slwi TEMP, r0, 1 + BASE_SHIFT slwi r0, r0, 0 + BASE_SHIFT add AO, AORIG, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE#endif#if defined(LN) || defined(LT) LFPDX f16, BO, INC2 fpsub f0, f16, f0#else LFPDX f16, AO, INC2 fpsub f0, f16, f0#endif#ifdef LN fsmtp f4, f0 LFD A1, (2 + 3) * SIZE(AO) LFD A2, (2 + 2) * SIZE(AO) LFD A3, (2 + 0) * SIZE(AO) fmul f4, A1, f4 fnmsub f0, A2, f4, f0 fmul f0, A3, f0 fsmfp f0, f4#endif#ifdef LT fsmtp f4, f0 LFD A1, (2 + 0) * SIZE(AO) LFD A2, (2 + 1) * SIZE(AO) LFD A3, (2 + 3) * SIZE(AO) fmul f0, A1, f0 fnmsub f4, A2, f0, f4 fmul f4, A3, f4 fsmfp f0, f4#endif#ifdef RN LFPDX A1, BO, INC2 fxpmul f0, A1, f0#endif#ifdef RT LFPDX A1, BO, INC2 fxpmul f0, A1, f0#endif#ifdef LN subi CO1, CO1, 2 * SIZE#endif#if defined(LN) || defined(LT) STFPDX f0, BO, INC2 STFDUX f0, CO1, INC STFSDUX f0, CO1, INC#else STFPDX f0, AO, INC2 STFDUX f0, CO1, INC STFSDUX f0, CO1, INC#endif#ifdef LN subi CO1, CO1, 2 * SIZE#endif#ifdef RT slwi r0, K, 1 + BASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi r0, TEMP, 1 + BASE_SHIFT slwi TEMP, TEMP, 0 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 2#endif#ifdef LN subi KK, KK, 2#endif li r0, FZERO lfpsx f0, SP, r0 .align 4.L120: andi. I, M, 1 beq .L129#if defined(LT) || defined(RN) addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, KK, 3 mtspr CTR, r0 ble .L124#else#ifdef LN slwi r0, K, 0 + BASE_SHIFT sub AORIG, AORIG, r0#endif slwi r0 , KK, 0 + BASE_SHIFT slwi TEMP, KK, 0 + BASE_SHIFT add AO, AORIG, r0 add BO, B, TEMP sub TEMP, K, KK addi BO, BO, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, TEMP, 3 mtspr CTR, r0 ble .L124#endif LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A4, AO, INC2 LFPDUX B4, BO, INC2 bdz- .L123 .align 4.L122: fpmadd f0, A1, B1, f0 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 fpmadd f1, A2, B2, f1 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 fpmadd f2, A3, B3, f2 LFPDUX A3, AO, INC2 LFPDUX B3, BO, INC2 fpmadd f3, A4, B4, f3 LFPDUX A4, AO, INC2 LFPDUX B4, BO, INC2 bdnz+ .L122 .align 4.L123: fpmadd f0, A1, B1, f0 fpmadd f1, A2, B2, f1 fpmadd f2, A3, B3, f2 fpmadd f3, A4, B4, f3 .align 4.L124:#if defined(LT) || defined(RN) andi. r0, KK, 7 mtspr CTR, r0 ble+ .L128#else andi. r0, TEMP, 7 mtspr CTR, r0 ble+ .L128#endif LFDX A1, AO, INC2 LFDX B1, BO, INC2 add AO, AO, INC add BO, BO, INC bdz- .L127 .align 4.L126: fmadd f0, A1, B1, f0 LFDX A1, AO, INC2 LFDX B1, BO, INC2 add AO, AO, INC add BO, BO, INC bdnz+ .L126 .align 4.L127: fmadd f0, A1, B1, f0 .align 4.L128: fpadd f0, f0, f1 fpadd f2, f2, f3 fpadd f0, f0, f2 fsmtp f1, f0 fadd f0, f0, f1#if defined(LN) || defined(RT)#ifdef LN subi r0, KK, 1#else subi r0, KK, 1#endif slwi TEMP, r0, 0 + BASE_SHIFT slwi r0, r0, 0 + BASE_SHIFT add AO, AORIG, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE#endif#if defined(LN) || defined(LT) LFDX f16, BO, INC2 fsub f0, f16, f0#else LFDX f16, AO, INC2 fsub f0, f16, f0#endif#ifdef LN LFD A1, (2 + 0) * SIZE(AO) fmul f0, A1, f0#endif#ifdef LT LFD A1, (2 + 0) * SIZE(AO) fmul f0, A1, f0#endif#ifdef RN LFDX A1, BO, INC2 fmul f0, A1, f0#endif#ifdef RT LFDX A1, BO, INC2 fmul f0, A1, f0#endif#ifdef LN subi CO1, CO1, 1 * SIZE#endif#if defined(LN) || defined(LT) STFDX f0, BO, INC2 STFDUX f0, CO1, INC#else STFDX f0, AO, INC2 STFDUX f0, CO1, INC#endif#ifdef LN subi CO1, CO1, 1 * SIZE#endif#ifdef RT slwi r0, K, 0 + BASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi r0, TEMP, 0 + BASE_SHIFT slwi TEMP, TEMP, 0 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 1#endif#ifdef LN subi KK, KK, 1#endif .align 4.L129:#ifdef LN slwi r0, K, 0 + BASE_SHIFT add B, B, r0#endif#if defined(LT) || defined(RN) addi B, BO, 2 * SIZE#endif#ifdef RN addi KK, KK, 1#endif#ifdef RT subi KK, KK, 1#endif .align 4.L50: andi. J, N, 2 beq .L90#ifdef RT slwi r0, K, 1 + BASE_SHIFT sub B, B, r0 slwi r0, LDC, 1 sub C, C, r0#endif mr CO1, C add CO2, C, LDC#ifdef LN add KK, M, OFFSET#endif#ifdef LT mr KK, OFFSET#endif#if defined(LN) || defined(RT) addi AORIG, A, -2 * SIZE#else addi AO, A, -2 * SIZE#endif#ifndef RT add C, CO2, LDC#endif li r0, FZERO lfpsx f0, SP, r0 srawi. I, M, 3 ble .L60 .align 4.L51:#if defined(LT) || defined(RN) fpmr f4, f0 addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f5, f0 fpmr f2, f0 fpmr f6, f0 srawi. r0, KK, 2 fpmr f3, f0 mtspr CTR, r0 fpmr f7, f0 ble .L54#else#ifdef LN slwi r0, K, 3 + BASE_SHIFT sub AORIG, AORIG, r0#endif slwi r0 , KK, 3 + BASE_SHIFT slwi TEMP, KK, 1 + BASE_SHIFT add AO, AORIG, r0 add BO, B, TEMP sub TEMP, K, KK fpmr f4, f0 addi BO, BO, - 2 * SIZE fpmr f1, f0 fpmr f5, f0 fpmr f2, f0 fpmr f6, f0 srawi. r0, TEMP, 2 fpmr f3, f0 mtspr CTR, r0 fpmr f7, f0 ble .L54#endif LFPDUX B1, BO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX A6, AO, INC2 LFPDUX A7, AO, INC2 LFPDUX A8, AO, INC2
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -