📄 ztrsm_kernel_ln.s
字号:
FADD1 c09, t3, c09 nop FMUL a2, b3, t3 nop FADD3 c13, t4, c13 add L, -1, L FMUL a2, b4, t4 LDF [AO - 11 * SIZE], a2 FADD2 c02, t1, c02 nop FMUL a3, b1, t1 nop FADD4 c06, t2, c06 nop FMUL a3, b2, t2 nop FADD2 c10, t3, c10 nop FMUL a3, b3, t3 nop FADD4 c14, t4, c14 nop FMUL a3, b4, t4 LDF [AO - 10 * SIZE], a3 FADD1 c03, t1, c03 nop FMUL a4, b1, t1 LDF [BO - 8 * SIZE], b1 FADD3 c07, t2, c07 nop FMUL a4, b2, t2 LDF [BO - 11 * SIZE], b2 FADD1 c11, t3, c11 nop FMUL a4, b3, t3 LDF [BO - 10 * SIZE], b3 FADD3 c15, t4, c15 nop FMUL a4, b4, t4 LDF [BO - 9 * SIZE], b4 FADD2 c04, t1, c04 nop FMUL a5, b5, t1 LDF [AO - 9 * SIZE], a4 FADD4 c08, t2, c08 nop FMUL a5, b2, t2 nop FADD2 c12, t3, c12 nop FMUL a5, b3, t3 nop FADD4 c16, t4, c16 nop FMUL a5, b4, t4 LDF [AO - 4 * SIZE], a5 FADD1 c01, t1, c01 nop FMUL a2, b5, t1 nop FADD3 c05, t2, c05 nop FMUL a2, b2, t2 nop FADD1 c09, t3, c09 nop FMUL a2, b3, t3 nop FADD3 c13, t4, c13 nop FMUL a2, b4, t4 LDF [AO - 7 * SIZE], a2 FADD2 c02, t1, c02 nop FMUL a3, b5, t1 nop FADD4 c06, t2, c06 nop FMUL a3, b2, t2 nop FADD2 c10, t3, c10 nop FMUL a3, b3, t3 nop FADD4 c14, t4, c14 nop FMUL a3, b4, t4 LDF [AO - 6 * SIZE], a3 FADD1 c03, t1, c03 nop FMUL a4, b5, t1 LDF [BO - 4 * SIZE], b5 FADD3 c07, t2, c07 nop FMUL a4, b2, t2 LDF [BO - 7 * SIZE], b2 FADD1 c11, t3, c11 nop FMUL a4, b3, t3 LDF [BO - 6 * SIZE], b3 FADD3 c15, t4, c15 nop FMUL a4, b4, t4 LDF [BO - 5 * SIZE], b4 FADD2 c04, t1, c04 nop FMUL a1, b1, t1 LDF [AO - 5 * SIZE], a4 FADD4 c08, t2, c08 nop FMUL a1, b2, t2 nop FADD2 c12, t3, c12 nop FMUL a1, b3, t3 nop FADD4 c16, t4, c16 nop FMUL a1, b4, t4 LDF [AO - 0 * SIZE], a1 FADD1 c01, t1, c01 nop FMUL a2, b1, t1 nop#ifdef DOUBLE prefetch [AO + (APREFETCHSIZE + 8) * SIZE], APREFETCH_CATEGORY#else nop#endif FADD3 c05, t2, c05 nop FMUL a2, b2, t2 FADD1 c09, t3, c09 nop FMUL a2, b3, t3 nop FADD3 c13, t4, c13 nop FMUL a2, b4, t4 nop FADD2 c02, t1, c02 nop FMUL a3, b1, t1 LDF [AO - 3 * SIZE], a2 FADD4 c06, t2, c06#ifdef DOUBLE prefetch [BO + (BPREFETCHSIZE + 8) * SIZE], BPREFETCH_CATEGORY#else nop#endif FMUL a3, b2, t2 nop FADD2 c10, t3, c10 nop FMUL a3, b3, t3 nop FADD4 c14, t4, c14 nop FMUL a3, b4, t4 LDF [AO - 2 * SIZE], a3 FADD1 c03, t1, c03 nop FMUL a4, b1, t1 LDF [BO - 0 * SIZE], b1 FADD3 c07, t2, c07 nop FMUL a4, b2, t2 LDF [BO - 3 * SIZE], b2 FADD1 c11, t3, c11 nop FMUL a4, b3, t3 LDF [BO - 2 * SIZE], b3 FADD3 c15, t4, c15 nop FMUL a4, b4, t4 LDF [BO - 1 * SIZE], b4 FADD2 c04, t1, c04 nop FMUL a5, b5, t1 LDF [AO - 1 * SIZE], a4 FADD4 c08, t2, c08 FMUL a5, b2, t2 FADD2 c12, t3, c12 FMUL a5, b3, t3 FADD4 c16, t4, c16 nop FMUL a5, b4, t4 LDF [AO + 4 * SIZE], a5 FADD1 c01, t1, c01 nop FMUL a2, b5, t1 nop FADD3 c05, t2, c05 nop FMUL a2, b2, t2 nop FADD1 c09, t3, c09 nop FMUL a2, b3, t3 nop FADD3 c13, t4, c13 nop FMUL a2, b4, t4 LDF [AO + 1 * SIZE], a2 FADD2 c02, t1, c02 nop FMUL a3, b5, t1 nop FADD4 c06, t2, c06 nop FMUL a3, b2, t2 nop FADD2 c10, t3, c10 nop FMUL a3, b3, t3 nop FADD4 c14, t4, c14 nop FMUL a3, b4, t4 LDF [AO + 2 * SIZE], a3 FADD1 c03, t1, c03 cmp L, 0 FMUL a4, b5, t1 LDF [BO + 4 * SIZE], b5 FADD3 c07, t2, c07 nop FMUL a4, b2, t2 LDF [BO + 1 * SIZE], b2 FADD1 c11, t3, c11 nop FMUL a4, b3, t3 LDF [BO + 2 * SIZE], b3 FADD3 c15, t4, c15 FMUL a4, b4, t4 bg,pt %icc, .LL22 LDF [BO + 3 * SIZE], b4.LL25:#if defined(LT) || defined(RN) and KK, 3, L#else and TEMP1, 3, L#endif cmp L, 0 ble,pn %icc, .LL29 nop .LL26: FADD2 c04, t1, c04 LDF [AO + 3 * SIZE], a4 FMUL a1, b1, t1 add AO, 4 * SIZE, AO FADD4 c08, t2, c08 add BO, 4 * SIZE, BO FMUL a1, b2, t2 add L, -1, L FADD2 c12, t3, c12 nop FMUL a1, b3, t3 cmp L, 0 FADD4 c16, t4, c16 nop FMUL a1, b4, t4 LDF [AO + 0 * SIZE], a1 FADD1 c01, t1, c01 nop FMUL a2, b1, t1 nop FADD3 c05, t2, c05 nop FMUL a2, b2, t2 nop FADD1 c09, t3, c09 nop FMUL a2, b3, t3 nop FADD3 c13, t4, c13 nop FMUL a2, b4, t4 LDF [AO + 1 * SIZE], a2 FADD2 c02, t1, c02 nop FMUL a3, b1, t1 nop FADD4 c06, t2, c06 nop FMUL a3, b2, t2 nop FADD2 c10, t3, c10 nop FMUL a3, b3, t3 nop FADD4 c14, t4, c14 nop FMUL a3, b4, t4 LDF [AO + 2 * SIZE], a3 FADD1 c03, t1, c03 nop FMUL a4, b1, t1 LDF [BO + 0 * SIZE], b1 FADD3 c07, t2, c07 nop FMUL a4, b2, t2 LDF [BO + 1 * SIZE], b2 FADD1 c11, t3, c11 nop FMUL a4, b3, t3 LDF [BO + 2 * SIZE], b3 FADD3 c15, t4, c15 FMUL a4, b4, t4 bg,pt %icc, .LL26 LDF [BO + 3 * SIZE], b4.LL29:#if defined(LN) || defined(RT) sub KK, 2, TEMP1 sll TEMP1, 1 + ZBASE_SHIFT, TEMP1 add AORIG, TEMP1, AO add B, TEMP1, BO#endif FADD2 c04, t1, c04 FADD4 c08, t2, c08 FADD2 c12, t3, c12 FADD4 c16, t4, c16 FADD c01, c06, c01 FADD c02, c05, c02 FADD c03, c08, c03 FADD c04, c07, c04 FADD c09, c14, c09 FADD c10, c13, c10 FADD c11, c16, c11 FADD c12, c15, c12#if defined(LN) || defined(LT) LDF [BO + 0 * SIZE], a1 LDF [BO + 1 * SIZE], a2 LDF [BO + 2 * SIZE], a3 LDF [BO + 3 * SIZE], a4 LDF [BO + 4 * SIZE], b1 LDF [BO + 5 * SIZE], b2 LDF [BO + 6 * SIZE], b3 LDF [BO + 7 * SIZE], b4 FSUB a1, c01, c01 FSUB a2, c02, c02 FSUB a3, c09, c09 FSUB a4, c10, c10 FSUB b1, c03, c03 FSUB b2, c04, c04 FSUB b3, c11, c11 FSUB b4, c12, c12#else LDF [AO + 0 * SIZE], a1 LDF [AO + 1 * SIZE], a2 LDF [AO + 2 * SIZE], a3 LDF [AO + 3 * SIZE], a4 LDF [AO + 4 * SIZE], b1 LDF [AO + 5 * SIZE], b2 LDF [AO + 6 * SIZE], b3 LDF [AO + 7 * SIZE], b4 FSUB a1, c01, c01 FSUB a2, c02, c02 FSUB a3, c03, c03 FSUB a4, c04, c04 FSUB b1, c09, c09 FSUB b2, c10, c10 FSUB b3, c11, c11 FSUB b4, c12, c12#endif#ifdef LN LDF [AO + 6 * SIZE], a1 LDF [AO + 7 * SIZE], a2 LDF [AO + 4 * SIZE], a3 LDF [AO + 5 * SIZE], a4 LDF [AO + 0 * SIZE], b1 LDF [AO + 1 * SIZE], b2 FMUL a1, c03, t1 FMUL a2, c04, t2 FMUL a1, c04, t3 FMUL a2, c03, t4 FMUL a1, c11, t5 FMUL a2, c12, t6 FMUL a1, c12, t7 FMUL a2, c11, t8 FADD4 t1, t2, c03 FADD2 t3, t4, c04 FADD4 t5, t6, c11 FADD2 t7, t8, c12 FMUL a3, c03, t1 FMUL a3, c04, t2 FMUL a3, c11, t3 FMUL a3, c12, t4 FMUL a4, c04, t5 FMUL a4, c03, t6 FMUL a4, c12, t7 FMUL a4, c11, t8 FSUB c01, t1, c01 FSUB c02, t2, c02 FSUB c09, t3, c09 FSUB c10, t4, c10 FADD2 c01, t5, c01 FADD4 c02, t6, c02 FADD2 c09, t7, c09 FADD4 c10, t8, c10 FMUL b1, c01, t1 FMUL b2, c02, t2 FMUL b1, c02, t3 FMUL b2, c01, t4 FMUL b1, c09, t5 FMUL b2, c10, t6 FMUL b1, c10, t7 FMUL b2, c09, t8 FADD4 t1, t2, c01 FADD2 t3, t4, c02 FADD4 t5, t6, c09 FADD2 t7, t8, c10#endif#ifdef LT LDF [AO + 0 * SIZE], a1 LDF [AO + 1 * SIZE], a2 LDF [AO + 2 * SIZE], a3 LDF [AO + 3 * SIZE], a4 LDF [AO + 6 * SIZE], b1 LDF [AO + 7 * SIZE], b2 FMUL a1, c01, t1 FMUL a2, c02, t2 FMUL a1, c02, t3 FMUL a2, c01, t4 FMUL a1, c09, t5 FMUL a2, c10, t6 FMUL a1, c10, t7 FMUL a2, c09, t8 FADD4 t1, t2, c01 FADD2 t3, t4, c02 FADD4 t5, t6, c09 FADD2 t7, t8, c10 FMUL a3, c01, t1 FMUL a3, c02, t2 FMUL a3, c09, t3 FMUL a3, c10, t4 FMUL a4, c02, t5 FMUL a4, c01, t6 FMUL a4, c10, t7 FMUL a4, c09, t8 FSUB c03, t1, c03 FSUB c04, t2, c04 FSUB c11, t3, c11 FSUB c12, t4, c12 FADD2 c03, t5, c03 FADD4 c04, t6, c04 FADD2 c11, t7, c11 FADD4 c12, t8, c12 FMUL b1, c03, t1 FMUL b2, c04, t2 FMUL b1, c04, t3 FMUL b2, c03, t4 FMUL b1, c11, t5 FMUL b2, c12, t6 FMUL b1, c12, t7 FMUL b2, c11, t8 FADD4 t1, t2, c03 FADD2 t3, t4, c04 FADD4 t5, t6, c11 FADD2 t7, t8, c12#endif#ifdef RN LDF [BO + 0 * SIZE], a1 LDF [BO + 1 * SIZE], a2 LDF [BO + 2 * SIZE], a3 LDF [BO + 3 * SIZE], a4 LDF [BO + 6 * SIZE], b1 LDF [BO + 7 * SIZE], b2 FMUL a1, c01, t1 FMUL a2, c02, t2 FMUL a1, c02, t3 FMUL a2, c01, t4 FMUL a1, c03, t5 FMUL a2, c04, t6 FMUL a1, c04, t7 FMUL a2, c03, t8 FADD4 t1, t2, c01 FADD3 t3, t4, c02 FADD4 t5, t6, c03 FADD3 t7, t8, c04 FMUL a3, c01, t1 FMUL a3, c02, t2 FMUL a3, c03, t3 FMUL a3, c04, t4 FMUL a4, c02, t5 FMUL a4, c01, t6 FMUL a4, c04, t7 FMUL a4, c03, t8 FSUB c09, t1, c09 FSUB c10, t2, c10 FSUB c11, t3, c11 FSUB c12, t4, c12 FADD3 c09, t5, c09 FADD4 c10, t6, c10 FADD3 c11, t7, c11 FADD4 c12, t8, c12 FMUL b1, c09, t1 FMUL b2, c10, t2 FMUL b1, c10, t3 FMUL b2, c09, t4 FMUL b1, c11, t5 FMUL b2, c12, t6 FMUL b1, c12, t7 FMUL b2, c11, t8 FADD4 t1, t2, c09 FADD3 t3, t4, c10 FADD4 t5, t6, c11 FADD3 t7, t8, c12#endif#ifdef RT LDF [BO + 6 * SIZE], a1 LDF [BO + 7 * SIZE], a2 LDF [BO + 4 * SIZE], a3 LDF [BO + 5 * SIZE], a4 LDF [BO + 0 * SIZE], b1 LDF [BO + 1 * SIZE], b2 FMUL a1, c09, t1 FMUL a2, c10, t2 FMUL a1, c10, t3 FMUL a2, c09, t4 FMUL a1, c11, t5 FMUL a2, c12, t6 FMUL a1, c12, t7 FMUL a2, c11, t8 FADD4 t1, t2, c09 FADD3 t3, t4, c10 FADD4 t5, t6, c11 FADD3 t7, t8, c12 FMUL a3, c09, t1 FMUL a3, c10, t2 FMUL a3, c11, t3 FMUL a3, c12, t4 FMUL a4, c10, t5 FMUL a4, c09, t6 FMUL a4, c12, t7 FMUL a4, c11, t8 FSUB c01, t1, c01 FSUB c02, t2, c02 FSUB c03, t3, c03 FSUB c04, t4, c04 FADD3 c01, t5, c01 FADD4 c02, t6, c02 FADD3 c03, t7, c03 FADD4 c04, t8, c04 FMUL b1, c01, t1 FMUL b2, c02, t2 FMUL b1, c02, t3 FMUL b2, c01, t4 FMUL b1, c03, t5 FMUL b2, c04, t6 FMUL b1, c04, t7 FMUL b2, c03, t8 FADD4 t1, t2, c01 FADD3 t3, t4, c02 FADD4 t5, t6, c03 FADD3 t7, t8, c04#endif#ifdef LN add C1, -4 * SIZE, C1 add C2, -4 * SIZE, C2#endif#if defined(LN) || defined(LT) STF c01, [BO + 0 * SIZE] STF c02, [BO + 1 * SIZE] STF c09, [BO + 2 * SIZE] STF c10, [BO + 3 * SIZE] STF c03, [BO + 4 * SIZE] STF c04, [BO + 5 * SIZE] STF c11, [BO + 6 * SIZE] STF c12, [BO + 7 * SIZE]#else STF c01, [AO + 0 * SIZE] STF c02, [AO + 1 * SIZE] STF c03, [AO + 2 * SIZE] STF c04, [AO + 3 * SIZE] STF c09, [AO + 4 * SIZE] STF c10, [AO + 5 * SIZE] STF c11, [AO + 6 * SIZE] STF c12, [AO + 7 * SIZE]#endif STF c01, [C1 + 0 * SIZE] STF c02, [C1 + 1 * SIZE] STF c03, [C1 + 2 * SIZE] STF c04, [C1 + 3 * SIZE] STF c09, [C2 + 0 * SIZE] STF c10, [C2 + 1 * SIZE] STF c11, [C2 + 2 * SIZE] STF c12, [C2 + 3 * SIZE]#ifndef LN add C1, 4 * SIZE, C1 add C2, 4 * SIZE, C2#endif#ifdef RT sll K, 1 + ZBASE_SHIFT, TEMP1 add AORIG, TEMP1, AORIG#endif#if defined(LT) || defined(RN) sub K, KK, TEMP1 sll TEMP1, 1 + ZBASE_SHIFT, TEMP1 add AO, TEMP1, AO add BO, TEMP1, BO#endif#ifdef LT add KK, 2, KK#endif#ifdef LN sub KK, 2, KK#endif add I, -1, I cmp I, 0 bg,pt %icc, .LL21 nop.LL99:#ifdef LN sll K, 1 + ZBASE_SHIFT, TEMP1 add B, TEMP1, B#endif#if defined(LT) || defined(RN) mov BO, B#endif#ifdef RN add KK, 2, KK#endif#ifdef RT sub KK, 2, KK#endif add J, -1, J cmp J, 0 bg,pt %icc, .LL11 nop.LL100: and N, 1, J cmp J, 0 ble,pn %icc, .LL999 nop#ifdef RT sll K, 0 + ZBASE_SHIFT, TEMP1 sub B, TEMP1, B sub C, LDC, C#endif mov C, C1#ifdef LN add M, OFFSET, KK#endif#ifdef LT mov OFFSET, KK#endif#if defined(LN) || defined(RT) mov A, AORIG
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -