📄 ztrsm_kernel_hummer_rt.s
字号:
fpmr f1, f0 addi BO, BO, - 2 * SIZE fpmr f2, f0 fpmr f3, f0 srawi. r0, TEMP, 2 mtspr CTR, r0 ble .L64#endif LFPDUX B1, BO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX A6, AO, INC2 LFPDUX B4, BO, INC2 LFPDUX A7, AO, INC2 LFPDUX A8, AO, INC2 bdz- .L63 .align 4.L62: FXCPMADD f0, B1, A1, f0 FXCSMADD f2, B1, A1, f2 LFPDUX A1, AO, INC2 FXCPMADD f1, B1, A2, f1 FXCSMADD f3, B1, A2, f3 LFPDUX A2, AO, INC2 LFPDUX B1, BO, INC2 FXCPMADD f0, B2, A3, f0 FXCSMADD f2, B2, A3, f2 LFPDUX A3, AO, INC2 FXCPMADD f1, B2, A4, f1 FXCSMADD f3, B2, A4, f3 LFPDUX A4, AO, INC2 LFPDUX B2, BO, INC2 FXCPMADD f0, B3, A5, f0 FXCSMADD f2, B3, A5, f2 LFPDUX A5, AO, INC2 FXCPMADD f1, B3, A6, f1 FXCSMADD f3, B3, A6, f3 LFPDUX A6, AO, INC2 LFPDUX B3, BO, INC2 FXCPMADD f0, B4, A7, f0 FXCSMADD f2, B4, A7, f2 LFPDUX A7, AO, INC2 FXCPMADD f1, B4, A8, f1 FXCSMADD f3, B4, A8, f3 LFPDUX A8, AO, INC2 LFPDUX B4, BO, INC2 bdnz+ .L62 .align 4.L63: FXCPMADD f0, B1, A1, f0 FXCSMADD f2, B1, A1, f2 FXCPMADD f1, B1, A2, f1 FXCSMADD f3, B1, A2, f3 FXCPMADD f0, B2, A3, f0 FXCSMADD f2, B2, A3, f2 FXCPMADD f1, B2, A4, f1 FXCSMADD f3, B2, A4, f3 FXCPMADD f0, B3, A5, f0 FXCSMADD f2, B3, A5, f2 FXCPMADD f1, B3, A6, f1 FXCSMADD f3, B3, A6, f3 FXCPMADD f0, B4, A7, f0 FXCSMADD f2, B4, A7, f2 FXCPMADD f1, B4, A8, f1 FXCSMADD f3, B4, A8, f3 .align 4.L64:#if defined(LT) || defined(RN) andi. r0, KK, 3 mtspr CTR, r0 ble+ .L68#else andi. r0, TEMP, 3 mtspr CTR, r0 ble+ .L68#endif LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 bdz- .L67 .align 4.L66: FXCPMADD f0, B1, A1, f0 FXCSMADD f2, B1, A1, f2 LFPDUX A1, AO, INC2 FXCPMADD f1, B1, A2, f1 FXCSMADD f3, B1, A2, f3 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 bdnz+ .L66 .align 4.L67: FXCPMADD f0, B1, A1, f0 FXCSMADD f2, B1, A1, f2 FXCPMADD f1, B1, A2, f1 FXCSMADD f3, B1, A2, f3 .align 4.L68: fpadd f0, f0, f2 fpadd f1, f1, f3#if defined(LN) || defined(RT)#ifdef LN subi r0, KK, 2#else subi r0, KK, 1#endif slwi TEMP, r0, 1 + ZBASE_SHIFT slwi r0, r0, 0 + ZBASE_SHIFT add AO, AORIG, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE#endif#if defined(LN) || defined(LT) LFPDUX f16, BO, INC2 LFPDUX f17, BO, INC2 subi BO, BO, 4 * SIZE#else LFPDUX f16, AO, INC2 LFPDUX f17, AO, INC2 subi AO, AO, 4 * SIZE#endif fpsub f0, f16, f0 fpsub f1, f17, f1#ifdef LN LFPDUX A1, AO, INC2 add AO, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX A3, AO, INC2 subi AO, AO, 8 * SIZE fxpmul f4, A3, f1 FXCXNPMA f1, A3, f1, f4 fxcpnmsub f0, A2, f1, f0 FXCXNSMA f0, A2, f1, f0 fxpmul f4, A1, f0 FXCXNPMA f0, A1, f0, f4#endif#ifdef LT LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 add AO, AO, INC2 LFPDUX A3, AO, INC2 subi AO, AO, 8 * SIZE fxpmul f4, A1, f0 FXCXNPMA f0, A1, f0, f4 fxcpnmsub f1, A2, f0, f1 FXCXNSMA f1, A2, f0, f1 fxpmul f6, A3, f1 FXCXNPMA f1, A3, f1, f6#endif#ifdef RN LFPDX A1, BO, INC2 fxpmul f4, A1, f0 fxpmul f5, A1, f1 FXCXNPMA f0, A1, f0, f4 FXCXNPMA f1, A1, f1, f5#endif#ifdef RT LFPDX A1, BO, INC2 fxpmul f4, A1, f0 fxpmul f5, A1, f1 FXCXNPMA f0, A1, f0, f4 FXCXNPMA f1, A1, f1, f5#endif#ifdef LN subi CO1, CO1, 4 * SIZE#endif#if defined(LN) || defined(LT) STFPDUX f0, BO, INC2 STFPDUX f1, BO, INC2 subi BO, BO, 4 * SIZE#else STFPDUX f0, AO, INC2 STFPDUX f1, AO, INC2 subi AO, AO, 4 * SIZE#endif STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f1, CO1, INC#ifdef LN subi CO1, CO1, 4 * SIZE#endif#ifdef RT slwi r0, K, 1 + ZBASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi r0, TEMP, 1 + ZBASE_SHIFT slwi TEMP, TEMP, 0 + ZBASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 2#endif#ifdef LN subi KK, KK, 2#endif li r0, FZERO lfpsx f0, SP, r0 .align 4.L70: andi. I, M, 1 beq .L89#if defined(LT) || defined(RN) addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, KK, 3 mtspr CTR, r0 ble .L74#else#ifdef LN slwi r0, K, 0 + ZBASE_SHIFT sub AORIG, AORIG, r0#endif slwi TEMP, KK, 0 + ZBASE_SHIFT add AO, AORIG, TEMP add BO, B, TEMP sub TEMP, K, KK addi BO, BO, - 2 * SIZE fpmr f1, f0 fpmr f2, f0 fpmr f3, f0 srawi. r0, TEMP, 3 mtspr CTR, r0 ble .L74#endif LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A4, AO, INC2 LFPDUX B4, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX B5, BO, INC2 LFPDUX A6, AO, INC2 LFPDUX B6, BO, INC2 LFPDUX A7, AO, INC2 LFPDUX A9, BO, INC2 LFPDUX A8, AO, INC2 LFPDUX A10, BO, INC2 bdz- .L73 .align 4.L72: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 FXCPMADD f2, B2, A2, f2 FXCSMADD f3, B2, A2, f3 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 FXCPMADD f0, B3, A3, f0 FXCSMADD f1, B3, A3, f1 LFPDUX A3, AO, INC2 LFPDUX B3, BO, INC2 FXCPMADD f2, B4, A4, f2 FXCSMADD f3, B4, A4, f3 LFPDUX A4, AO, INC2 LFPDUX B4, BO, INC2 FXCPMADD f0, B5, A5, f0 FXCSMADD f1, B5, A5, f1 LFPDUX A5, AO, INC2 LFPDUX B5, BO, INC2 FXCPMADD f2, B6, A6, f2 FXCSMADD f3, B6, A6, f3 LFPDUX A6, AO, INC2 LFPDUX B6, BO, INC2 FXCPMADD f0, A9, A7, f0 FXCSMADD f1, A9, A7, f1 LFPDUX A7, AO, INC2 LFPDUX A9, BO, INC2 FXCPMADD f2, A10, A8, f2 FXCSMADD f3, A10, A8, f3 LFPDUX A8, AO, INC2 LFPDUX A10, BO, INC2 bdnz+ .L72 .align 4.L73: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 FXCPMADD f2, B2, A2, f2 FXCSMADD f3, B2, A2, f3 FXCPMADD f0, B3, A3, f0 FXCSMADD f1, B3, A3, f1 FXCPMADD f2, B4, A4, f2 FXCSMADD f3, B4, A4, f3 FXCPMADD f0, B5, A5, f0 FXCSMADD f1, B5, A5, f1 FXCPMADD f2, B6, A6, f2 FXCSMADD f3, B6, A6, f3 FXCPMADD f0, A9, A7, f0 FXCSMADD f1, A9, A7, f1 FXCPMADD f2, A10, A8, f2 FXCSMADD f3, A10, A8, f3 .align 4.L74:#if defined(LT) || defined(RN) andi. r0, KK, 7 mtspr CTR, r0 ble+ .L78#else andi. r0, TEMP, 7 mtspr CTR, r0 ble+ .L78#endif LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 bdz- .L77 .align 4.L76: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 bdnz+ .L76 .align 4.L77: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 .align 4.L78: fpadd f0, f0, f2 fpadd f1, f1, f3 fpadd f0, f0, f1#if defined(LN) || defined(RT)#ifdef LN subi r0, KK, 1#else subi r0, KK, 1#endif slwi TEMP, r0, 0 + ZBASE_SHIFT add AO, AORIG, TEMP add BO, B, TEMP addi BO, BO, - 2 * SIZE#endif#if defined(LN) || defined(LT) LFPDX f16, BO, INC2#else LFPDX f16, AO, INC2#endif fpsub f0, f16, f0#ifdef LN LFPDX A1, AO, INC2 fxpmul f4, A1, f0 FXCXNPMA f0, A1, f0, f4#endif#ifdef LT LFPDX A1, AO, INC2 fxpmul f4, A1, f0 FXCXNPMA f0, A1, f0, f4#endif#ifdef RN LFPDX A1, BO, INC2 fxpmul f4, A1, f0 FXCXNPMA f0, A1, f0, f4#endif#ifdef RT LFPDX A1, BO, INC2 fxpmul f4, A1, f0 FXCXNPMA f0, A1, f0, f4#endif#ifdef LN subi CO1, CO1, 2 * SIZE#endif#if defined(LN) || defined(LT) STFPDX f0, BO, INC2#else STFPDX f0, AO, INC2#endif STFDUX f0, CO1, INC STFSDUX f0, CO1, INC#ifdef LN subi CO1, CO1, 2 * SIZE#endif#ifdef RT slwi r0, K, 0 + ZBASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi TEMP, TEMP, 0 + ZBASE_SHIFT add AO, AO, TEMP add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 1#endif#ifdef LN subi KK, KK, 1#endif li r0, FZERO lfpsx f0, SP, r0 .align 4.L89:#ifdef LN slwi r0, K, 0 + ZBASE_SHIFT add B, B, r0#endif#if defined(LT) || defined(RN) addi B, BO, 2 * SIZE#endif#ifdef RN addi KK, KK, 1#endif#ifdef RT subi KK, KK, 1#endif .align 4.L50: srawi. J, N, 1 ble .L999 .align 4.L10:#ifdef RT slwi r0, K, 1 + ZBASE_SHIFT sub B, B, r0 slwi r0, LDC, 1 sub C, C, r0#endif mr CO1, C add CO2, C, LDC#ifdef LN add KK, M, OFFSET#endif#ifdef LT mr KK, OFFSET#endif#if defined(LN) || defined(RT) addi AORIG, A, -4 * SIZE#else addi AO, A, -4 * SIZE#endif#ifndef RT add C, CO2, LDC#endif li r0, FZERO lfpsx f0, SP, r0 srawi. I, M, 2 ble .L20 .align 4.L11:#if defined(LT) || defined(RN) addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, B, - 4 * SIZE fpmr f8, f0 addi BO2, B, - 2 * SIZE fpmr f12, f0 fpmr f5, f0 fpmr f9, f0 fpmr f13, f0 fpmr f2, f0 fpmr f6, f0 fpmr f10, f0 fpmr f14, f0 fpmr f3, f0 fpmr f7, f0 fpmr f11, f0 fpmr f15, f0 srawi. r0, KK, 2 fpmr f1, f0 mtspr CTR, r0 ble .L14#else#ifdef LN slwi r0, K, 2 + ZBASE_SHIFT sub AORIG, AORIG, r0#endif slwi r0 , KK, 2 + ZBASE_SHIFT slwi TEMP, KK, 1 + ZBASE_SHIFT add AO, AORIG, r0 add BO, B, TEMP sub TEMP, K, KK fpmr f5, f0 fpmr f9, f0 fpmr f13, f0 fpmr f2, f0 fpmr f6, f0 fpmr f10, f0 fpmr f14, f0 fpmr f3, f0 fpmr f7, f0 fpmr f11, f0 fpmr f15, f0 addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, BO, - 4 * SIZE fpmr f8, f0 addi BO2, BO, 2 * SIZE fpmr f12, f0 srawi. r0, TEMP, 2 fpmr f1, f0 mtspr CTR, r0 ble .L14#endif LFPDUX A1, AO, INC4 fpmr f5, f0 LFPDUX A3, AO, INC4 fpmr f9, f0 LFPDUX B1, BO, INC4 fpmr f13, f0 LFPDUX A5, AO, INC4 fpmr f2, f0 LFPDUX A6, AO, INC4 fpmr f6, f0 LFPDUX B3, BO, INC4 fpmr f10, f0 LFPDUX A7, AO, INC4 fpmr f14, f0 LFPDUX A8, AO, INC4 fpmr f3, f0 LFPDUX B5, BO, INC4 fpmr f7, f0 LFPDUX A9, AO, INC4 fpmr f11, f0 LFPDUX A2, AO2, INC4 fpmr f15, f0 LFPDUX B2, BO2, INC4 bdz- .L13 .align 4.L12:## 1 ## FXCPMADD f0, B1, A1, f0 nop FXCSMADD f4, B1, A1, f4 nop FXCPMADD f8, B2, A1, f8 LFPDUX B4, BO2, INC4 FXCSMADD f12, B2, A1, f12 LFPDUX B6, BO, INC4 FXCPMADD f1, B1, A2, f1 nop FXCSMADD f5, B1, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B2, A2, f9 LFPDUX A10, AO, INC4 FXCSMADD f13, B2, A2, f13 nop FXCPMADD f2, B1, A3, f2 nop FXCSMADD f6, B1, A3, f6 nop FXCPMADD f10, B2, A3, f10 nop FXCSMADD f14, B2, A3, f14 nop FXCPMADD f3, B1, A4, f3 nop FXCSMADD f7, B1, A4, f7 LFPDUX A2, AO2, INC4 FXCPMADD f11, B2, A4, f11 LFPDUX A1, AO, INC4 FXCSMADD f15, B2, A4, f15 nop## 2 ## FXCPMADD f0, B3, A5, f0 nop FXCSMADD f4, B3, A5, f4 nop FXCPMADD f8, B4, A5, f8 LFPDUX B2, BO2, INC4 FXCSMADD f12, B4, A5, f12 LFPDUX B1, BO, INC4 FXCPMADD f1, B3, A2, f1 nop FXCSMADD f5, B3, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B4, A2, f9 LFPDUX A3, AO, INC4 FXCSMADD f13, B4, A2, f13 nop FXCPMADD f2, B3, A6, f2 nop FXCSMADD f6, B3, A6, f6 nop FXCPMADD f10, B4, A6, f10 nop FXCSMADD f14, B4, A6, f14 nop FXCPMADD f3, B3, A4, f3 nop FXCSMADD f7, B3, A4, f7 LFPDUX A2, AO2, INC4 FXCPMADD f11, B4, A4, f11 LFPDUX A5, AO, INC4 FXCSMADD f15, B4, A4, f15 nop## 3 ## FXCPMADD f0, B5, A7, f0 nop FXCSMADD f4, B5, A7, f4 nop FXCPMADD f8, B2, A7, f8 LFPDUX B4, BO2, INC4
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -