📄 trsm_kernel_lt.s
字号:
#if defined(LT) || defined(RN) mov BO, B#endif#ifdef RN add KK, 2, KK#endif#ifdef RT sub KK, 2, KK#endif.LL200: and N, 1, J cmp J, 0 ble,pn %icc, .LL999 nop#ifdef RT sll K, 0 + BASE_SHIFT, TEMP1 sub B, TEMP1, B sub C, LDC, C#endif mov C, C1#ifdef LN add M, OFFSET, KK#endif#ifdef LT mov OFFSET, KK#endif#if defined(LN) || defined(RT) mov A, AORIG#else mov A, AO#endif#ifndef RT add C, LDC, C#endif sra M, 2, I cmp I, 0 ble,pn %icc, .LL250 nop.LL221:#if defined(LT) || defined(RN) sra KK, 2, L mov B, BO cmp L, 0#else#ifdef LN sll K, 2 + BASE_SHIFT, TEMP1 sub AORIG, TEMP1, AORIG#endif sll KK, 2 + BASE_SHIFT, TEMP1 sll KK, 0 + BASE_SHIFT, TEMP2 add AORIG, TEMP1, AO add B, TEMP2, BO sub K, KK, TEMP1 sra TEMP1, 2, L cmp L, 0#endif LDF [AO + 0 * SIZE], a1 FMOV FZERO, c01 LDF [BO + 0 * SIZE], b1 FMOV FZERO, t1 LDF [AO + 1 * SIZE], a2 FMOV FZERO, c02 LDF [BO + 1 * SIZE], b2 FMOV FZERO, t2 LDF [AO + 2 * SIZE], a3 FMOV FZERO, c03 LDF [BO + 2 * SIZE], b3 FMOV FZERO, t3 LDF [AO + 3 * SIZE], a4 FMOV FZERO, c04 LDF [BO + 3 * SIZE], b4 FMOV FZERO, t4 ble,pn %icc, .LL225 prefetch [C1 + 4 * SIZE], 2.LL222: FADD c01, t1, c01 add BO, 4 * SIZE, BO FMUL a1, b1, t1 LDF [AO + 4 * SIZE], a1 FADD c02, t2, c02 FMUL a2, b1, t2 LDF [AO + 5 * SIZE], a2 FADD c03, t3, c03 add L, -1, L FMUL a3, b1, t3 LDF [AO + 6 * SIZE], a3 FADD c04, t4, c04 FMUL a4, b1, t4 LDF [AO + 7 * SIZE], a4 LDF [BO + 0 * SIZE], b1 FADD c01, t1, c01 cmp L, 0 FMUL a1, b2, t1 LDF [AO + 8 * SIZE], a1 FADD c02, t2, c02 FMUL a2, b2, t2 LDF [AO + 9 * SIZE], a2 FADD c03, t3, c03 FMUL a3, b2, t3 LDF [AO + 10 * SIZE], a3 FADD c04, t4, c04 FMUL a4, b2, t4 LDF [AO + 11 * SIZE], a4 LDF [BO + 1 * SIZE], b2 FADD c01, t1, c01 FMUL a1, b3, t1 LDF [AO + 12 * SIZE], a1 FADD c02, t2, c02 FMUL a2, b3, t2 LDF [AO + 13 * SIZE], a2 FADD c03, t3, c03 FMUL a3, b3, t3 LDF [AO + 14 * SIZE], a3 FADD c04, t4, c04 FMUL a4, b3, t4 LDF [AO + 15 * SIZE], a4 LDF [BO + 2 * SIZE], b3 FADD c01, t1, c01 FMUL a1, b4, t1 LDF [AO + 16 * SIZE], a1 FADD c02, t2, c02 FMUL a2, b4, t2 LDF [AO + 17 * SIZE], a2 FADD c03, t3, c03 FMUL a3, b4, t3 LDF [AO + 18 * SIZE], a3 FADD c04, t4, c04 FMUL a4, b4, t4 LDF [AO + 19 * SIZE], a4 add AO, 16 * SIZE, AO bg,pt %icc, .LL222 LDF [BO + 3 * SIZE], b4.LL225:#if defined(LT) || defined(RN) and KK, 3, L#else and TEMP1, 3, L#endif cmp L, 0 ble,a,pn %icc, .LL229 nop.LL226: FADD c01, t1, c01 add BO, 1 * SIZE, BO FMUL a1, b1, t1 LDF [AO + 4 * SIZE], a1 FADD c02, t2, c02 add L, -1, L FMUL a2, b1, t2 LDF [AO + 5 * SIZE], a2 FADD c03, t3, c03 cmp L, 0 FMUL a3, b1, t3 LDF [AO + 6 * SIZE], a3 FADD c04, t4, c04 FMUL a4, b1, t4 LDF [AO + 7 * SIZE], a4 add AO, 4 * SIZE, AO bg,pt %icc, .LL226 LDF [BO + 0 * SIZE], b1.LL229: FADD c01, t1, c01 FADD c02, t2, c02 FADD c03, t3, c03 FADD c04, t4, c04#if defined(LN) || defined(RT)#ifdef LN sub KK, 4, TEMP1#else sub KK, 1, TEMP1#endif sll TEMP1, 2 + BASE_SHIFT, TEMP2 sll TEMP1, 0 + BASE_SHIFT, TEMP1 add AORIG, TEMP2, AO add B, TEMP1, BO#endif#if defined(LN) || defined(LT) LDF [BO + 0 * SIZE], a1 LDF [BO + 1 * SIZE], a2 LDF [BO + 2 * SIZE], a3 LDF [BO + 3 * SIZE], a4 FSUB a1, c01, c01 FSUB a2, c02, c02 FSUB a3, c03, c03 FSUB a4, c04, c04#else LDF [AO + 0 * SIZE], a1 LDF [AO + 1 * SIZE], a2 LDF [AO + 2 * SIZE], a3 LDF [AO + 3 * SIZE], a4 FSUB a1, c01, c01 FSUB a2, c02, c02 FSUB a3, c03, c03 FSUB a4, c04, c04#endif#ifdef LN LDF [AO + 15 * SIZE], a1 LDF [AO + 14 * SIZE], a2 LDF [AO + 13 * SIZE], a3 LDF [AO + 12 * SIZE], a4 FMUL a1, c04, c04 FMUL a2, c04, t1 FSUB c03, t1, c03 FMUL a3, c04, t1 FSUB c02, t1, c02 FMUL a4, c04, t1 FSUB c01, t1, c01 LDF [AO + 10 * SIZE], a1 LDF [AO + 9 * SIZE], a2 LDF [AO + 8 * SIZE], a3 FMUL a1, c03, c03 FMUL a2, c03, t1 FSUB c02, t1, c02 FMUL a3, c03, t1 FSUB c01, t1, c01 LDF [AO + 5 * SIZE], a1 LDF [AO + 4 * SIZE], a2 FMUL a1, c02, c02 FMUL a2, c02, t1 FSUB c01, t1, c01 LDF [AO + 0 * SIZE], a1 FMUL a1, c01, c01#endif#ifdef LT LDF [AO + 0 * SIZE], a1 LDF [AO + 1 * SIZE], a2 LDF [AO + 2 * SIZE], a3 LDF [AO + 3 * SIZE], a4 FMUL a1, c01, c01 FMUL a2, c01, t1 FSUB c02, t1, c02 FMUL a3, c01, t1 FSUB c03, t1, c03 FMUL a4, c01, t1 FSUB c04, t1, c04 LDF [AO + 5 * SIZE], a1 LDF [AO + 6 * SIZE], a2 LDF [AO + 7 * SIZE], a3 FMUL a1, c02, c02 FMUL a2, c02, t1 FSUB c03, t1, c03 FMUL a3, c02, t1 FSUB c04, t1, c04 LDF [AO + 10 * SIZE], a1 LDF [AO + 11 * SIZE], a2 FMUL a1, c03, c03 FMUL a2, c03, t1 FSUB c04, t1, c04 LDF [AO + 15 * SIZE], a1 FMUL a1, c04, c04#endif#ifdef RN LDF [BO + 0 * SIZE], a1 FMUL a1, c01, c01 FMUL a1, c02, c02 FMUL a1, c03, c03 FMUL a1, c04, c04#endif#ifdef RT LDF [BO + 0 * SIZE], a1 FMUL a1, c01, c01 FMUL a1, c02, c02 FMUL a1, c03, c03 FMUL a1, c04, c04#endif#ifdef LN add C1, -4 * SIZE, C1#endif#if defined(LN) || defined(LT) STF c01, [BO + 0 * SIZE] STF c02, [BO + 1 * SIZE] STF c03, [BO + 2 * SIZE] STF c04, [BO + 3 * SIZE]#else STF c01, [AO + 0 * SIZE] STF c02, [AO + 1 * SIZE] STF c03, [AO + 2 * SIZE] STF c04, [AO + 3 * SIZE]#endif STF c01, [C1 + 0 * SIZE] STF c02, [C1 + 1 * SIZE] STF c03, [C1 + 2 * SIZE] STF c04, [C1 + 3 * SIZE] FMOV FZERO, t1 FMOV FZERO, t2 FMOV FZERO, t3 FMOV FZERO, t4#ifndef LN add C1, 4 * SIZE, C1#endif#ifdef RT sll K, 2 + BASE_SHIFT, TEMP1 add AORIG, TEMP1, AORIG#endif#if defined(LT) || defined(RN) sub K, KK, TEMP1 sll TEMP1, 2 + BASE_SHIFT, TEMP2 sll TEMP1, 0 + BASE_SHIFT, TEMP1 add AO, TEMP2, AO add BO, TEMP1, BO#endif#ifdef LT add KK, 4, KK#endif#ifdef LN sub KK, 4, KK#endif add I, -1, I cmp I, 0 bg,pt %icc, .LL221 nop.LL250: and M, 2, I cmp I, 0 ble,pn %icc, .LL270 nop#if defined(LT) || defined(RN) sra KK, 2, L mov B, BO cmp L, 0#else#ifdef LN sll K, 1 + BASE_SHIFT, TEMP1 sub AORIG, TEMP1, AORIG#endif sll KK, 1 + BASE_SHIFT, TEMP1 sll KK, 0 + BASE_SHIFT, TEMP2 add AORIG, TEMP1, AO add B, TEMP2, BO sub K, KK, TEMP1 sra TEMP1, 2, L cmp L, 0#endif LDF [AO + 0 * SIZE], a1 FMOV FZERO, c01 LDF [BO + 0 * SIZE], b1 FMOV FZERO, t1 LDF [AO + 1 * SIZE], a2 FMOV FZERO, c02 LDF [BO + 1 * SIZE], b2 FMOV FZERO, t2 LDF [AO + 2 * SIZE], a3 FMOV FZERO, c03 LDF [BO + 2 * SIZE], b3 FMOV FZERO, t3 LDF [AO + 3 * SIZE], a4 FMOV FZERO, c04 LDF [BO + 3 * SIZE], b4 FMOV FZERO, t4 ble,pn %icc, .LL255 nop.LL252: FADD c01, t1, c01 add L, -1, L FMUL a1, b1, t1 LDF [AO + 4 * SIZE], a1 FADD c02, t2, c02 FMUL a2, b1, t2 LDF [AO + 5 * SIZE], a2 LDF [BO + 4 * SIZE], b1 FADD c03, t3, c03 cmp L, 0 FMUL a3, b2, t3 LDF [AO + 6 * SIZE], a3 FADD c04, t4, c04 FMUL a4, b2, t4 LDF [AO + 7 * SIZE], a4 LDF [BO + 5 * SIZE], b2 FADD c01, t1, c01 FMUL a1, b3, t1 LDF [AO + 8 * SIZE], a1 FADD c02, t2, c02 FMUL a2, b3, t2 LDF [AO + 9 * SIZE], a2 LDF [BO + 6 * SIZE], b3 FADD c03, t3, c03 FMUL a3, b4, t3 LDF [AO + 10 * SIZE], a3 FADD c04, t4, c04 FMUL a4, b4, t4 LDF [AO + 11 * SIZE], a4 add AO, 8 * SIZE, AO LDF [BO + 7 * SIZE], b4 bg,pt %icc, .LL252 add BO, 4 * SIZE, BO.LL255:#if defined(LT) || defined(RN) and KK, 3, L#else and TEMP1, 3, L#endif cmp L, 0 ble,a,pn %icc, .LL259 nop.LL256: FADD c01, t1, c01 add L, -1, L FMUL a1, b1, t1 LDF [AO + 2 * SIZE], a1 FADD c02, t2, c02 cmp L, 0 FMUL a2, b1, t2 LDF [AO + 3 * SIZE], a2 LDF [BO + 1 * SIZE], b1 add AO, 2 * SIZE, AO bg,pt %icc, .LL256 add BO, 1 * SIZE, BO.LL259: FADD c01, t1, c01 FADD c02, t2, c02 FADD c03, t3, c03 FADD c04, t4, c04 FADD c01, c03, c01 FADD c02, c04, c02#if defined(LN) || defined(RT)#ifdef LN sub KK, 2, TEMP1#else sub KK, 1, TEMP1#endif sll TEMP1, 1 + BASE_SHIFT, TEMP2 sll TEMP1, 0 + BASE_SHIFT, TEMP1 add AORIG, TEMP2, AO add B, TEMP1, BO#endif#if defined(LN) || defined(LT) LDF [BO + 0 * SIZE], a1 LDF [BO + 1 * SIZE], a2 FSUB a1, c01, c01 FSUB a2, c02, c02#else LDF [AO + 0 * SIZE], a1 LDF [AO + 1 * SIZE], a2 FSUB a1, c01, c01 FSUB a2, c02, c02#endif#ifdef LN LDF [AO + 3 * SIZE], a1 LDF [AO + 2 * SIZE], a2 LDF [AO + 0 * SIZE], a3 FMUL a1, c02, c02 FMUL a2, c02, t1 FSUB c01, t1, c01 FMUL a3, c01, c01#endif#ifdef LT LDF [AO + 0 * SIZE], a1 LDF [AO + 1 * SIZE], a2 LDF [AO + 3 * SIZE], a3 FMUL a1, c01, c01 FMUL a2, c01, t1 FSUB c02, t1, c02 FMUL a3, c02, c02#endif#ifdef RN LDF [BO + 0 * SIZE], a1 FMUL a1, c01, c01 FMUL a1, c02, c02#endif#ifdef RT LDF [BO + 0 * SIZE], a1 FMUL a1, c01, c01 FMUL a1, c02, c02#endif#ifdef LN add C1, -2 * SIZE, C1#endif#if defined(LN) || defined(LT) STF c01, [BO + 0 * SIZE] STF c02, [BO + 1 * SIZE]#else STF c01, [AO + 0 * SIZE] STF c02, [AO + 1 * SIZE]#endif STF c01, [C1 + 0 * SIZE] STF c02, [C1 + 1 * SIZE] FMOV FZERO, t1 FMOV FZERO, t2 FMOV FZERO, t3 FMOV FZERO, t4#ifndef LN add C1, 2 * SIZE, C1#endif#ifdef RT sll K, 1 + BASE_SHIFT, TEMP1 add AORIG, TEMP1, AORIG#endif#if defined(LT) || defined(RN) sub K, KK, TEMP1 sll TEMP1, 1 + BASE_SHIFT, TEMP2 sll TEMP1, 0 + BASE_SHIFT, TEMP1 add AO, TEMP2, AO add BO, TEMP1, BO#endif#ifdef LT add KK, 2, KK#endif#ifdef LN sub KK, 2, KK#endif.LL270: and M, 1, I cmp I, 0 ble,pn %icc, .LL299 nop#if defined(LT) || defined(RN) sra KK, 2, L mov B, BO cmp L, 0#else#ifdef LN sll K, 0 + BASE_SHIFT, TEMP1 sub AORIG, TEMP1, AORIG#endif sll KK, 0 + BASE_SHIFT, TEMP1 add AORIG, TEMP1, AO add B, TEMP1, BO sub K, KK, TEMP1 sra TEMP1, 2, L cmp L, 0#endif LDF [AO + 0 * SIZE], a1 FMOV FZERO, t1 LDF [AO + 1 * SIZE], a2 FMOV FZERO, c01 LDF [AO + 2 * SIZE], a3 FMOV FZERO, t2 LDF [AO + 3 * SIZE], a4 FMOV FZERO, c02 LDF [BO + 0 * SIZE], b1 FMOV FZERO, t3 LDF [BO + 1 * SIZE], b2 FMOV FZERO, t4 LDF [BO + 2 * SIZE], b3 ble,pn %icc, .LL275 LDF [BO + 3 * SIZE], b4.LL272: FADD c01, t1, c01 add L, -1, L add AO, 4 * SIZE, AO FMUL a1, b1, t1 add BO, 4 * SIZE, BO LDF [AO + 0 * SIZE], a1 FADD c02, t2, c02 cmp L, 0 LDF [BO + 0 * SIZE], b1 FMUL a2, b2, t2 LDF [AO + 1 * SIZE], a2 FADD c01, t3, c01 LDF [BO + 1 * SIZE], b2 FMUL a3, b3, t3 LDF [AO + 2 * SIZE], a3 FADD c02, t4, c02 LDF [BO + 2 * SIZE], b3 FMUL a4, b4, t4 LDF [AO + 3 * SIZE], a4 bg,pt %icc, .LL272 LDF [BO + 3 * SIZE], b4.LL275:#if defined(LT) || defined(RN) and KK, 3, L#else and TEMP1, 3, L#endif cmp L, 0 ble,a,pn %icc, .LL279 nop.LL276: FADD c01, t1, c01 add L, -1, L FMUL a1, b1, t1 LDF [AO + 1 * SIZE], a1 LDF [BO + 1 * SIZE], b1 add BO, 1 * SIZE, BO cmp L, 0 bg,pt %icc, .LL276 add AO, 1 * SIZE, AO.LL279: FADD c01, t1, c01 FADD c02, t2, c02 FADD c01, t3, c01 FADD c02, t4, c02 FADD c01, c02, c01#if defined(LN) || defined(RT) sub KK, 1, TEMP1 sll TEMP1, 0 + BASE_SHIFT, TEMP1 add AORIG, TEMP1, AO add B, TEMP1, BO#endif#if defined(LN) || defined(LT) LDF [BO + 0 * SIZE], a1 FSUB a1, c01, c01#else LDF [AO + 0 * SIZE], a1 FSUB a1, c01, c01#endif#ifdef LN LDF [AO + 0 * SIZE], a1 FMUL a1, c01, c01#endif#ifdef LT LDF [AO + 0 * SIZE], a1 FMUL a1, c01, c01#endif#ifdef RN LDF [BO + 0 * SIZE], a1 FMUL a1, c01, c01#endif#ifdef RT LDF [BO + 0 * SIZE], a1 FMUL a1, c01, c01#endif#ifdef LN add C1, -1 * SIZE, C1#endif#if defined(LN) || defined(LT) STF c01, [BO + 0 * SIZE]#else STF c01, [AO + 0 * SIZE]#endif STF c01, [C1 + 0 * SIZE] FMOV FZERO, t1 FMOV FZERO, t2 FMOV FZERO, t3 FMOV FZERO, t4#ifndef LN add C1, 1 * SIZE, C1#endif#ifdef RT sll K, 0 + BASE_SHIFT, TEMP1 add AORIG, TEMP1, AORIG#endif#if defined(LT) || defined(RN) sub K, KK, TEMP1 sll TEMP1, 0 + BASE_SHIFT, TEMP1 add AO, TEMP1, AO add BO, TEMP1, BO#endif#ifdef LT add KK, 1, KK#endif#ifdef LN sub KK, 1, KK#endif.LL299:#ifdef LN sll K, 0 + BASE_SHIFT, TEMP1 add B, TEMP1, B#endif#if defined(LT) || defined(RN) mov BO, B#endif#ifdef RN add KK, 1, KK#endif#ifdef RT sub KK, 1, KK#endif.LL999: return %i7 + 8 clr %o0 EPILOGUE
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -