📄 trsm_kernel_hummer_rt.s
字号:
bdz- .L53 .align 4.L52: fxcpmadd f0, B1, A1, f0 LFPDUX B4, BO, INC2 fxcsmadd f4, B1, A1, f4 LFPDUX A1, AO, INC2 fxcpmadd f1, B1, A2, f1 nop fxcsmadd f5, B1, A2, f5 LFPDUX A2, AO, INC2 fxcpmadd f2, B1, A3, f2 nop fxcsmadd f6, B1, A3, f6 LFPDUX A3, AO, INC2 fxcpmadd f3, B1, A4, f3 nop fxcsmadd f7, B1, A4, f7 LFPDUX A4, AO, INC2 fxcpmadd f0, B2, A5, f0 LFPDUX B1, BO, INC2 fxcsmadd f4, B2, A5, f4 LFPDUX A5, AO, INC2 fxcpmadd f1, B2, A6, f1 nop fxcsmadd f5, B2, A6, f5 LFPDUX A6, AO, INC2 fxcpmadd f2, B2, A7, f2 nop fxcsmadd f6, B2, A7, f6 LFPDUX A7, AO, INC2 fxcpmadd f3, B2, A8, f3 nop fxcsmadd f7, B2, A8, f7 LFPDUX A8, AO, INC2 fxcpmadd f0, B3, A1, f0 LFPDUX B2, BO, INC2 fxcsmadd f4, B3, A1, f4 LFPDUX A1, AO, INC2 fxcpmadd f1, B3, A2, f1 nop fxcsmadd f5, B3, A2, f5 LFPDUX A2, AO, INC2 fxcpmadd f2, B3, A3, f2 nop fxcsmadd f6, B3, A3, f6 LFPDUX A3, AO, INC2 fxcpmadd f3, B3, A4, f3 nop fxcsmadd f7, B3, A4, f7 LFPDUX A4, AO, INC2 fxcpmadd f0, B4, A5, f0 LFPDUX B3, BO, INC2 fxcsmadd f4, B4, A5, f4 LFPDUX A5, AO, INC2 fxcpmadd f1, B4, A6, f1 nop fxcsmadd f5, B4, A6, f5 LFPDUX A6, AO, INC2 fxcpmadd f2, B4, A7, f2 nop fxcsmadd f6, B4, A7, f6 LFPDUX A7, AO, INC2 fxcpmadd f3, B4, A8, f3 nop fxcsmadd f7, B4, A8, f7 LFPDUX A8, AO, INC2 bdnz+ .L52 .align 4.L53: fxcpmadd f0, B1, A1, f0 LFPDUX B4, BO, INC2 fxcsmadd f4, B1, A1, f4 LFPDUX A1, AO, INC2 fxcpmadd f1, B1, A2, f1 nop fxcsmadd f5, B1, A2, f5 LFPDUX A2, AO, INC2 fxcpmadd f2, B1, A3, f2 nop fxcsmadd f6, B1, A3, f6 LFPDUX A3, AO, INC2 fxcpmadd f3, B1, A4, f3 nop fxcsmadd f7, B1, A4, f7 LFPDUX A4, AO, INC2 fxcpmadd f0, B2, A5, f0 nop fxcsmadd f4, B2, A5, f4 LFPDUX A5, AO, INC2 fxcpmadd f1, B2, A6, f1 nop fxcsmadd f5, B2, A6, f5 LFPDUX A6, AO, INC2 fxcpmadd f2, B2, A7, f2 nop fxcsmadd f6, B2, A7, f6 LFPDUX A7, AO, INC2 fxcpmadd f3, B2, A8, f3 nop fxcsmadd f7, B2, A8, f7 LFPDUX A8, AO, INC2 fxcpmadd f0, B3, A1, f0 fxcsmadd f4, B3, A1, f4 fxcpmadd f1, B3, A2, f1 fxcsmadd f5, B3, A2, f5 fxcpmadd f2, B3, A3, f2 fxcsmadd f6, B3, A3, f6 fxcpmadd f3, B3, A4, f3 fxcsmadd f7, B3, A4, f7 fxcpmadd f0, B4, A5, f0 fxcsmadd f4, B4, A5, f4 fxcpmadd f1, B4, A6, f1 fxcsmadd f5, B4, A6, f5 fxcpmadd f2, B4, A7, f2 fxcsmadd f6, B4, A7, f6 fxcpmadd f3, B4, A8, f3 fxcsmadd f7, B4, A8, f7 .align 4.L54:#if defined(LT) || defined(RN) andi. r0, KK, 3 mtspr CTR, r0 ble+ .L58#else andi. r0, TEMP, 3 mtspr CTR, r0 ble+ .L58#endif LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 bdz- .L57 .align 4.L56: fxcpmadd f0, B1, A1, f0 fxcsmadd f4, B1, A1, f4 LFPDUX A1, AO, INC2 fxcpmadd f1, B1, A2, f1 fxcsmadd f5, B1, A2, f5 LFPDUX A2, AO, INC2 fxcpmadd f2, B1, A3, f2 fxcsmadd f6, B1, A3, f6 LFPDUX A3, AO, INC2 fxcpmadd f3, B1, A4, f3 fxcsmadd f7, B1, A4, f7 LFPDUX A4, AO, INC2 LFPDUX B1, BO, INC2 bdnz+ .L56 .align 4.L57: fxcpmadd f0, B1, A1, f0 fxcsmadd f4, B1, A1, f4 fxcpmadd f1, B1, A2, f1 fxcsmadd f5, B1, A2, f5 fxcpmadd f2, B1, A3, f2 fxcsmadd f6, B1, A3, f6 fxcpmadd f3, B1, A4, f3 fxcsmadd f7, B1, A4, f7 .align 4.L58:#if defined(LN) || defined(RT)#ifdef LN subi r0, KK, 8#else subi r0, KK, 2#endif slwi TEMP, r0, 3 + BASE_SHIFT slwi r0, r0, 1 + BASE_SHIFT add AO, AORIG, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE#endif#if defined(LN) || defined(LT) fpmr f24, f0 fpmr f25, f1 fpmr f26, f2 fpmr f27, f3 fsmfp f0, f4 fsmfp f1, f5 fsmfp f2, f6 fsmfp f3, f7 fsmtp f4, f24 fsmtp f5, f25 fsmtp f6, f26 fsmtp f7, f27 LFPDUX f16, BO, INC2 LFPDUX f17, BO, INC2 LFPDUX f18, BO, INC2 LFPDUX f19, BO, INC2 LFPDUX f20, BO, INC2 LFPDUX f21, BO, INC2 LFPDUX f22, BO, INC2 LFPDUX f23, BO, INC2 subi BO, BO, 16 * SIZE fpsub f0, f16, f0 fpsub f4, f17, f4 fpsub f1, f18, f1 fpsub f5, f19, f5 fpsub f2, f20, f2 fpsub f6, f21, f6 fpsub f3, f22, f3 fpsub f7, f23, f7#else LFPDUX f16, AO, INC2 LFPDUX f17, AO, INC2 LFPDUX f18, AO, INC2 LFPDUX f19, AO, INC2 LFPDUX f20, AO, INC2 LFPDUX f21, AO, INC2 LFPDUX f22, AO, INC2 LFPDUX f23, AO, INC2 subi AO, AO, 16 * SIZE fpsub f0, f16, f0 fpsub f1, f17, f1 fpsub f2, f18, f2 fpsub f3, f19, f3 fpsub f4, f20, f4 fpsub f5, f21, f5 fpsub f6, f22, f6 fpsub f7, f23, f7#endif#ifdef LN addi AO, AO, 66 * SIZE LFPDUX A1, AO, INCM2 LFPDUX A2, AO, INCM2 LFPDUX A3, AO, INCM2 LFPDUX A4, AO, INCM2 LFPDUX A5, AO, INCM2 LFPDUX A6, AO, INCM2 LFPDUX A7, AO, INCM2 LFPDUX A8, AO, INCM2 fxsmul f7, A1, f7 fxcpnmsub f3, A1, f7, f3 fxcsnmsub f6, A2, f7, f6 fxcpnmsub f2, A2, f7, f2 fxcsnmsub f5, A3, f7, f5 fxcpnmsub f1, A3, f7, f1 fxcsnmsub f4, A4, f7, f4 fxcpnmsub f0, A4, f7, f0 fxpmul f3, A5, f3 fxcsnmsub f6, A6, f3, f6 fxcpnmsub f2, A6, f3, f2 fxcsnmsub f5, A7, f3, f5 fxcpnmsub f1, A7, f3, f1 fxcsnmsub f4, A8, f3, f4 fxcpnmsub f0, A8, f3, f0 add AO, AO, INCM2 LFPDUX A1, AO, INCM2 LFPDUX A2, AO, INCM2 LFPDUX A3, AO, INCM2 add AO, AO, INCM2 LFPDUX A4, AO, INCM2 LFPDUX A5, AO, INCM2 LFPDUX A6, AO, INCM2 add AO, AO, INCM2 add AO, AO, INCM2 LFPDUX A7, AO, INCM2 LFPDUX A8, AO, INCM2 fxsmul f6, A1, f6 fxcpnmsub f2, A1, f6, f2 fxcsnmsub f5, A2, f6, f5 fxcpnmsub f1, A2, f6, f1 fxcsnmsub f4, A3, f6, f4 fxcpnmsub f0, A3, f6, f0 fxpmul f2, A4, f2 fxcsnmsub f5, A5, f2, f5 fxcpnmsub f1, A5, f2, f1 fxcsnmsub f4, A6, f2, f4 fxcpnmsub f0, A6, f2, f0 fxsmul f5, A7, f5 fxcpnmsub f1, A7, f5, f1 fxcsnmsub f4, A8, f5, f4 fxcpnmsub f0, A8, f5, f0 add AO, AO, INCM2 add AO, AO, INCM2 LFPDUX A1, AO, INCM2 LFPDUX A2, AO, INCM2 subi AO, AO, 6 * SIZE LFPDUX A3, AO, INCM2 subi AO, AO, 6 * SIZE LFPDUX A4, AO, INCM2 addi AO, AO, -2 * SIZE fxpmul f1, A1, f1 fxcsnmsub f4, A2, f1, f4 fxcpnmsub f0, A2, f1, f0 fxsmul f4, A3, f4 fxcpnmsub f0, A3, f4, f0 fxpmul f0, A4, f0#endif#ifdef LT LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX A5, AO, INC2 LFPDUX A6, AO, INC2 LFPDUX A7, AO, INC2 LFPDUX A8, AO, INC2 fxpmul f0, A1, f0 fxcsnmsub f4, A1, f0, f4 fxcpnmsub f1, A2, f0, f1 fxcsnmsub f5, A2, f0, f5 fxcpnmsub f2, A3, f0, f2 fxcsnmsub f6, A3, f0, f6 fxcpnmsub f3, A4, f0, f3 fxcsnmsub f7, A4, f0, f7 fxsmul f4, A5, f4 fxcpnmsub f1, A6, f4, f1 fxcsnmsub f5, A6, f4, f5 fxcpnmsub f2, A7, f4, f2 fxcsnmsub f6, A7, f4, f6 fxcpnmsub f3, A8, f4, f3 fxcsnmsub f7, A8, f4, f7 add AO, AO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX A3, AO, INC2 add AO, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX A5, AO, INC2 LFPDUX A6, AO, INC2 add AO, AO, INC2 add AO, AO, INC2 LFPDUX A7, AO, INC2 LFPDUX A8, AO, INC2 fxpmul f1, A1, f1 fxcsnmsub f5, A1, f1, f5 fxcpnmsub f2, A2, f1, f2 fxcsnmsub f6, A2, f1, f6 fxcpnmsub f3, A3, f1, f3 fxcsnmsub f7, A3, f1, f7 fxsmul f5, A4, f5 fxcpnmsub f2, A5, f5, f2 fxcsnmsub f6, A5, f5, f6 fxcpnmsub f3, A6, f5, f3 fxcsnmsub f7, A6, f5, f7 fxpmul f2, A7, f2 fxcsnmsub f6, A7, f2, f6 fxcpnmsub f3, A8, f2, f3 fxcsnmsub f7, A8, f2, f7 add AO, AO, INC2 add AO, AO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 addi AO, AO, 6 * SIZE LFPDUX A3, AO, INC2 addi AO, AO, 6 * SIZE LFPDUX A4, AO, INC2 subi AO, AO, 64 * SIZE fxsmul f6, A1, f6 fxcpnmsub f3, A2, f6, f3 fxcsnmsub f7, A2, f6, f7 fxpmul f3, A3, f3 fxcsnmsub f7, A3, f3, f7 fxsmul f7, A4, f7#endif#ifdef RN LFPDUX A1, BO, INC2 LFPDUX A2, BO, INC2 subi BO, BO, 4 * SIZE fxpmul f0, A1, f0 fxpmul f1, A1, f1 fxpmul f2, A1, f2 fxpmul f3, A1, f3 fxcsnmsub f4, A1, f0, f4 fxcsnmsub f5, A1, f1, f5 fxcsnmsub f6, A1, f2, f6 fxcsnmsub f7, A1, f3, f7 fxsmul f4, A2, f4 fxsmul f5, A2, f5 fxsmul f6, A2, f6 fxsmul f7, A2, f7#endif#ifdef RT LFPDUX A2, BO, INC2 LFPDUX A1, BO, INC2 subi BO, BO, 4 * SIZE fxsmul f4, A1, f4 fxsmul f5, A1, f5 fxsmul f6, A1, f6 fxsmul f7, A1, f7 fxcpnmsub f0, A1, f4, f0 fxcpnmsub f1, A1, f5, f1 fxcpnmsub f2, A1, f6, f2 fxcpnmsub f3, A1, f7, f3 fxpmul f0, A2, f0 fxpmul f1, A2, f1 fxpmul f2, A2, f2 fxpmul f3, A2, f3#endif#ifdef LN subi CO1, CO1, 8 * SIZE subi CO2, CO2, 8 * SIZE#endif#if defined(LN) || defined(LT) STFPDUX f0, BO, INC2 STFPDUX f4, BO, INC2 STFPDUX f1, BO, INC2 STFPDUX f5, BO, INC2 STFPDUX f2, BO, INC2 STFPDUX f6, BO, INC2 STFPDUX f3, BO, INC2 STFPDUX f7, BO, INC2 subi BO, BO, 16 * SIZE STFDUX f0, CO1, INC STFDUX f4, CO1, INC STFDUX f1, CO1, INC STFDUX f5, CO1, INC STFDUX f2, CO1, INC STFDUX f6, CO1, INC STFDUX f3, CO1, INC STFDUX f7, CO1, INC STFSDUX f0, CO2, INC STFSDUX f4, CO2, INC STFSDUX f1, CO2, INC STFSDUX f5, CO2, INC STFSDUX f2, CO2, INC STFSDUX f6, CO2, INC STFSDUX f3, CO2, INC STFSDUX f7, CO2, INC#else STFPDUX f0, AO, INC2 STFPDUX f1, AO, INC2 STFPDUX f2, AO, INC2 STFPDUX f3, AO, INC2 STFPDUX f4, AO, INC2 STFPDUX f5, AO, INC2 STFPDUX f6, AO, INC2 STFPDUX f7, AO, INC2 subi AO, AO, 16 * SIZE STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f1, CO1, INC STFDUX f2, CO1, INC STFSDUX f2, CO1, INC STFDUX f3, CO1, INC STFSDUX f3, CO1, INC STFDUX f4, CO2, INC STFSDUX f4, CO2, INC STFDUX f5, CO2, INC STFSDUX f5, CO2, INC STFDUX f6, CO2, INC STFSDUX f6, CO2, INC STFDUX f7, CO2, INC STFSDUX f7, CO2, INC#endif#ifdef LN subi CO1, CO1, 8 * SIZE subi CO2, CO2, 8 * SIZE#endif#ifdef RT slwi r0, K, 3 + BASE_SHIFT add AORIG, AORIG, r0#endif#if defined(LT) || defined(RN) sub TEMP, K, KK slwi r0, TEMP, 3 + BASE_SHIFT slwi TEMP, TEMP, 1 + BASE_SHIFT add AO, AO, r0 add BO, BO, TEMP#endif#ifdef LT addi KK, KK, 8#endif#ifdef LN subi KK, KK, 8#endif addic. I, I, -1 li r0, FZERO lfpsx f0, SP, r0 bgt+ .L51 .align 4.L60: andi. I, M, 4 beq .L70#if defined(LT) || defined(RN) fpmr f1, f0 addi BO, B, - 2 * SIZE fpmr f2, f0 fpmr f3, f0 srawi. r0, KK, 2 mtspr CTR, r0 ble .L64#else#ifdef LN slwi r0, K, 2 + BASE_SHIFT sub AORIG, AORIG, r0#endif slwi r0 , KK, 2 + BASE_SHIFT slwi TEMP, KK, 1 + BASE_SHIFT add AO, AORIG, r0 add BO, B, TEMP sub TEMP, K, KK fpmr f1, f0 addi BO, BO, - 2 * SIZE fpmr f2, f0 fpmr f3, f0 srawi. r0, TEMP, 2 mtspr CTR, r0 ble .L64#endif LFPDUX B1, BO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX A6, AO, INC2 LFPDUX B4, BO, INC2 LFPDUX A7, AO, INC2 LFPDUX A8, AO, INC2 bdz- .L63 .align 4.L62: fxcpmadd f0, B1, A1, f0 fxcsmadd f2, B1, A1, f2 LFPDUX A1, AO, INC2 fxcpmadd f1, B1, A2, f1 fxcsmadd f3, B1, A2, f3 LFPDUX A2, AO, INC2 LFPDUX B1, BO, INC2 fxcpmadd f0, B2, A3, f0 fxcsmadd f2, B2, A3, f2 LFPDUX A3, AO, INC2 fxcpmadd f1, B2, A4, f1 fxcsmadd f3, B2, A4, f3 LFPDUX A4, AO, INC2 LFPDUX B2, BO, INC2 fxcpmadd f0, B3, A5, f0 fxcsmadd f2, B3, A5, f2 LFPDUX A5, AO, INC2 fxcpmadd f1, B3, A6, f1 fxcsmadd f3, B3, A6, f3 LFPDUX A6, AO, INC2 LFPDUX B3, BO, INC2 fxcpmadd f0, B4, A7, f0 fxcsmadd f2, B4, A7, f2 LFPDUX A7, AO, INC2 fxcpmadd f1, B4, A8, f1 fxcsmadd f3, B4, A8, f3 LFPDUX A8, AO, INC2 LFPDUX B4, BO, INC2 bdnz+ .L62 .align 4.L63: fxcpmadd f0, B1, A1, f0 fxcsmadd f2, B1, A1, f2 fxcpmadd f1, B1, A2, f1 fxcsmadd f3, B1, A2, f3 fxcpmadd f0, B2, A3, f0 fxcsmadd f2, B2, A3, f2 fxcpmadd f1, B2, A4, f1 fxcsmadd f3, B2, A4, f3 fxcpmadd f0, B3, A5, f0 fxcsmadd f2, B3, A5, f2 fxcpmadd f1, B3, A6, f1 fxcsmadd f3, B3, A6, f3 fxcpmadd f0, B4, A7, f0 fxcsmadd f2, B4, A7, f2 fxcpmadd f1, B4, A8, f1 fxcsmadd f3, B4, A8, f3 .align 4.L64:#if defined(LT) || defined(RN) andi. r0, KK, 3 mtspr CTR, r0 ble+ .L68#else andi. r0, TEMP, 3 mtspr CTR, r0 ble+ .L68#endif LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 bdz- .L67 .align 4.L66: fxcpmadd f0, B1, A1, f0 fxcsmadd f2, B1, A1, f2 LFPDUX A1, AO, INC2 fxcpmadd f1, B1, A2, f1 fxcsmadd f3, B1, A2, f3 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 bdnz+ .L66 .align 4.L67: fxcpmadd f0, B1, A1, f0 fxcsmadd f2, B1, A1, f2 fxcpmadd f1, B1, A2, f1 fxcsmadd f3, B1, A2, f3 .align 4.L68:#if defined(LN) || defined(RT)#ifdef LN subi r0, KK, 4#else subi r0, KK, 2#endif slwi TEMP, r0, 2 + BASE_SHIFT slwi r0, r0, 1 + BASE_SHIFT add AO, AORIG, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE#endif#if defined(LN) || defined(LT) fpmr f24, f0 fpmr f25, f1 fsmfp f0, f2 fsmfp f1, f3 fsmtp f2, f24 fsmtp f3, f25 LFPDUX f16, BO, INC2 LFPDUX f17, BO, INC2 LFPDUX f18, BO, INC2 LFPDUX f19, BO, INC2 subi BO, BO, 8 * SIZE fpsub f0, f16, f0 fpsub f2, f17, f2 fpsub f1, f18, f1 fpsub f3, f19, f3#else LFPDUX f16, AO, INC2 LFPDUX f17, AO, INC2 LFPDUX f18, AO, INC2 LFPDUX f19, AO, INC2 subi AO, AO, 8 * SIZE fpsub f0, f16, f0 fpsub f1, f17, f1 fpsub f2, f18, f2 fpsub f3, f19, f3#endif#ifdef LN addi AO, AO, 18 * SIZE LFPDUX A1, AO, INCM2 LFPDUX A2, AO, INCM2 LFPDUX A3, AO, INCM2 LFPDUX A4, AO, INCM2 add AO, AO, INCM2 LFPDUX A5, AO, INCM2 add AO, AO, INCM2 LFPDUX A6, AO, INCM2 subi AO, AO, 2 * SIZE fxsmul f3, A1, f3 fxcpnmsub f1, A1, f3, f1 fxcsnmsub f2, A2, f3, f2 fxcpnmsub f0, A2, f3, f0 fxpmul f1, A3, f1 fxcsnmsub f2, A4, f1, f2 fxcpnmsub f0, A4, f1, f0 fxsmul f2, A5, f2 fxcpnmsub f0, A5, f2, f0
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -