📄 zgemv_n.s
字号:
LDF [A1 + 8 * SIZE], a1 FSUBX y7, t3, y7 FMUL a3, x1, t3 LDF [Y1 + 10 * SIZE], y3 FADDX y8, t4, y8 add Y1, 8 * SIZE, Y1 FMUL a3, x2, t4 LDF [A1 + 10 * SIZE], a3 STF y5, [Y1 - 4 * SIZE] STF y6, [Y1 - 3 * SIZE] STF y7, [Y1 - 2 * SIZE] STF y8, [Y1 - 1 * SIZE].LL26: FADD y1, t1, y1 LDF [Y1 + 3 * SIZE], y4 FMUL a2, x2, t1 FADD y2, t2, y2 FMUL a2, x1, t2 FADD y3, t3, y3 LDF [Y1 + 4 * SIZE], y5 FMUL a4, x2, t3 FADD y4, t4, y4 FMUL a4, x1, t4 FSUBX y1, t1, y1 LDF [Y1 + 5 * SIZE], y6 FMUL a5, x3, t1 FADDX y2, t2, y2 FMUL a5, x4, t2 FSUBX y3, t3, y3 LDF [Y1 + 6 * SIZE], y7 FADDX y4, t4, y4 FMUL a7, x4, t4 FADD y1, t1, y1 LDF [Y1 + 7 * SIZE], y8 FMUL a7, x3, t3 FMUL a6, x4, t1 FADD y2, t2, y2 FMUL a6, x3, t2 FADD y3, t3, y3 FMUL a8, x4, t3 FADD y4, t4, y4 FMUL a8, x3, t4 FSUBX y1, t1, y1 FMUL a9, x1, t1 FADDX y2, t2, y2 FMUL a9, x2, t2 FSUBX y3, t3, y3 FMUL a11, x1, t3 FADDX y4, t4, y4 FMUL a11, x2, t4 FADD y5, t1, y5 FMUL a10, x2, t1 FADD y6, t2, y6 FMUL a10, x1, t2 FADD y7, t3, y7 FMUL a12, x2, t3 FADD y8, t4, y8 FMUL a12, x1, t4 FSUBX y5, t1, y5 FMUL a13, x3, t1 FADDX y6, t2, y6 FMUL a13, x4, t2 FSUBX y7, t3, y7 FMUL a15, x3, t3 FADDX y8, t4, y8 FMUL a15, x4, t4 FADD y5, t1, y5 FMUL a14, x4, t1 FADD y6, t2, y6 FMUL a14, x3, t2 FADD y7, t3, y7 FMUL a16, x4, t3 FADD y8, t4, y8 FMUL a16, x3, t4 STF y1, [Y1 + 0 * SIZE] FSUBX y5, t1, y5 STF y2, [Y1 + 1 * SIZE] FADDX y6, t2, y6 STF y3, [Y1 + 2 * SIZE] FSUBX y7, t3, y7 STF y4, [Y1 + 3 * SIZE] FADDX y8, t4, y8 STF y5, [Y1 + 4 * SIZE] add A1, 8 * SIZE, A1 STF y6, [Y1 + 5 * SIZE] add A2, 8 * SIZE, A2 STF y7, [Y1 + 6 * SIZE] STF y8, [Y1 + 7 * SIZE] add Y1, 8 * SIZE, Y1.LL27: andcc M, 2, I ble,pn %icc, .LL28 nop LDF [A1 + 0 * SIZE], a1 LDF [A1 + 1 * SIZE], a2 LDF [A1 + 2 * SIZE], a3 LDF [A1 + 3 * SIZE], a4 LDF [Y1 + 0 * SIZE], y1 LDF [Y1 + 1 * SIZE], y2 LDF [Y1 + 2 * SIZE], y3 LDF [Y1 + 3 * SIZE], y4 FMUL a1, x1, t1 LDF [A2 + 0 * SIZE], a5 FMUL a1, x2, t2 LDF [A2 + 1 * SIZE], a6 FMUL a3, x1, t3 LDF [A2 + 2 * SIZE], a7 FMUL a3, x2, t4 LDF [A2 + 3 * SIZE], a8 FADD y1, t1, y1 FMUL a2, x2, t1 FADD y2, t2, y2 FMUL a2, x1, t2 FADD y3, t3, y3 FMUL a4, x2, t3 FADD y4, t4, y4 FMUL a4, x1, t4 FSUBX y1, t1, y1 FMUL a5, x3, t1 FADDX y2, t2, y2 FMUL a5, x4, t2 FSUBX y3, t3, y3 FMUL a7, x3, t3 FADDX y4, t4, y4 FMUL a7, x4, t4 FADD y1, t1, y1 FMUL a6, x4, t1 FADD y2, t2, y2 FMUL a6, x3, t2 FADD y3, t3, y3 FMUL a8, x4, t3 FADD y4, t4, y4 FMUL a8, x3, t4 FSUBX y1, t1, y1 FADDX y2, t2, y2 FSUBX y3, t3, y3 FADDX y4, t4, y4 STF y1, [Y1 + 0 * SIZE] add A1, 4 * SIZE, A1 STF y2, [Y1 + 1 * SIZE] add A2, 4 * SIZE, A2 STF y3, [Y1 + 2 * SIZE] nop STF y4, [Y1 + 3 * SIZE] add Y1, 4 * SIZE, Y1.LL28: andcc M, 1, I ble,pn %icc, .LL29 nop LDF [A1 + 0 * SIZE], a1 LDF [A1 + 1 * SIZE], a2 LDF [A2 + 0 * SIZE], a3 LDF [A2 + 1 * SIZE], a4 LDF [Y1 + 0 * SIZE], y1 LDF [Y1 + 1 * SIZE], y2 FMUL a1, x1, t1 FMUL a1, x2, t2 FMUL a2, x2, t3 FMUL a2, x1, t4 FADD y1, t1, y1 FMUL a3, x3, t1 FADD y2, t2, y2 FMUL a3, x4, t2 FSUBX y1, t3, y1 FMUL a4, x4, t3 FADDX y2, t4, y2 FMUL a4, x3, t4 FADD y1, t1, y1 FADD y2, t2, y2 FSUBX y1, t3, y1 FADDX y2, t4, y2 STF y1, [Y1 + 0 * SIZE] STF y2, [Y1 + 1 * SIZE].LL29: deccc J bg %icc, .LL21 nop.LL30: andcc N, 1, J ble,pn %icc, .LL990 nop.LL31: mov YY, Y1 mov A, A1 LDF STACK_ALPHA_R, ALPHA_R LDF STACK_ALPHA_I, ALPHA_I LDF [X + 0 * SIZE], x1 LDF [X + 1 * SIZE], x2 FMUL ALPHA_R, x1, a1 /* AC */ FMUL ALPHA_I, x1, a2 /* AD */ FMUL ALPHA_R, x2, a3 /* BC */ FMUL ALPHA_I, x2, a4 /* BD */#ifndef XCONJ FSUB a1, a4, x1 FADD a2, a3, x2#else FADD a1, a4, x1 FSUB a2, a3, x2#endif sra M, 2, I cmp I, 0 ble,pn %icc, .LL37 nop LDF [A1 + 0 * SIZE], a1 LDF [A1 + 1 * SIZE], a2 LDF [A1 + 2 * SIZE], a3 LDF [A1 + 3 * SIZE], a4 LDF [A1 + 4 * SIZE], a9 LDF [A1 + 5 * SIZE], a10 LDF [A1 + 6 * SIZE], a11 LDF [A1 + 7 * SIZE], a12 LDF [Y1 + 0 * SIZE], y1 LDF [Y1 + 1 * SIZE], y2 LDF [Y1 + 2 * SIZE], y3 LDF [Y1 + 3 * SIZE], y4 LDF [Y1 + 4 * SIZE], y5 LDF [Y1 + 5 * SIZE], y6 LDF [Y1 + 6 * SIZE], y7 LDF [Y1 + 7 * SIZE], y8 FMUL a1, x1, t1 deccc I FMUL a1, x2, t2 LDF [A1 + 8 * SIZE], a1 FMUL a3, x1, t3 FMUL a3, x2, t4 ble,pn %icc, .LL33 LDF [A1 + 10 * SIZE], a3.LL32: FADD y1, t1, y1 prefetch [A1 + PREFETCHSIZE * SIZE], 1 FMUL a2, x2, t1 FADD y2, t2, y2 FMUL a2, x1, t2 LDF [A1 + 9 * SIZE], a2 FADD y3, t3, y3 FMUL a4, x2, t3 FADD y4, t4, y4 FMUL a4, x1, t4 LDF [A1 + 11 * SIZE], a4 FSUBX y1, t1, y1 FMUL a9, x1, t1 FADDX y2, t2, y2 FMUL a9, x2, t2 LDF [A1 + 12 * SIZE], a9 FSUBX y3, t3, y3 FMUL a11, x1, t3 FADDX y4, t4, y4 FMUL a11, x2, t4 LDF [A1 + 14 * SIZE], a11 STF y1, [Y1 + 0 * SIZE] STF y2, [Y1 + 1 * SIZE] STF y3, [Y1 + 2 * SIZE] STF y4, [Y1 + 3 * SIZE] FADD y5, t1, y5 FMUL a10, x2, t1 LDF [Y1 + 8 * SIZE], y1 FADD y6, t2, y6 FMUL a10, x1, t2 LDF [A1 + 13 * SIZE], a10 FADD y7, t3, y7 deccc I FMUL a12, x2, t3 LDF [Y1 + 9 * SIZE], y2 FADD y8, t4, y8 FMUL a12, x1, t4 LDF [A1 + 15 * SIZE], a12 FSUBX y5, t1, y5 add A1, 8 * SIZE, A1 FMUL a1, x1, t1 LDF [Y1 + 10 * SIZE], y3 FADDX y6, t2, y6 FMUL a1, x2, t2 LDF [A1 + 8 * SIZE], a1 FSUBX y7, t3, y7 FMUL a3, x1, t3 LDF [Y1 + 11 * SIZE], y4 FADDX y8, t4, y8 FMUL a3, x2, t4 LDF [A1 + 10 * SIZE], a3 STF y5, [Y1 + 4 * SIZE] STF y6, [Y1 + 5 * SIZE] STF y7, [Y1 + 6 * SIZE] STF y8, [Y1 + 7 * SIZE] LDF [Y1 + 12 * SIZE], y5 LDF [Y1 + 13 * SIZE], y6 LDF [Y1 + 14 * SIZE], y7 add Y1, 8 * SIZE, Y1 bg,pn %icc, .LL32 LDF [Y1 + 7 * SIZE], y8.LL33: FADD y1, t1, y1 FMUL a2, x2, t1 FADD y2, t2, y2 FMUL a2, x1, t2 FADD y3, t3, y3 FMUL a4, x2, t3 FADD y4, t4, y4 FMUL a4, x1, t4 FSUBX y1, t1, y1 FMUL a9, x1, t1 FADDX y2, t2, y2 FMUL a9, x2, t2 FSUBX y3, t3, y3 FMUL a11, x1, t3 FADDX y4, t4, y4 FMUL a11, x2, t4 FADD y5, t1, y5 FMUL a10, x2, t1 FADD y6, t2, y6 FMUL a10, x1, t2 FADD y7, t3, y7 FMUL a12, x2, t3 FADD y8, t4, y8 FMUL a12, x1, t4 FSUBX y5, t1, y5 FADDX y6, t2, y6 FSUBX y7, t3, y7 FADDX y8, t4, y8 STF y1, [Y1 + 0 * SIZE] STF y2, [Y1 + 1 * SIZE] STF y3, [Y1 + 2 * SIZE] STF y4, [Y1 + 3 * SIZE] STF y5, [Y1 + 4 * SIZE] STF y6, [Y1 + 5 * SIZE] STF y7, [Y1 + 6 * SIZE] STF y8, [Y1 + 7 * SIZE] add A1, 8 * SIZE, A1 add Y1, 8 * SIZE, Y1.LL37: andcc M, 2, I ble,pn %icc, .LL38 nop LDF [A1 + 0 * SIZE], a1 LDF [A1 + 1 * SIZE], a2 LDF [A1 + 2 * SIZE], a3 LDF [A1 + 3 * SIZE], a4 LDF [Y1 + 0 * SIZE], y1 FMUL a1, x1, t1 LDF [Y1 + 1 * SIZE], y2 FMUL a1, x2, t2 LDF [Y1 + 2 * SIZE], y3 FMUL a3, x1, t3 LDF [Y1 + 3 * SIZE], y4 FMUL a3, x2, t4 FADD y1, t1, y1 FMUL a2, x2, t1 FADD y2, t2, y2 FMUL a2, x1, t2 FADD y3, t3, y3 FMUL a4, x2, t3 FADD y4, t4, y4 FMUL a4, x1, t4 FSUBX y1, t1, y1 FADDX y2, t2, y2 FSUBX y3, t3, y3 FADDX y4, t4, y4 STF y1, [Y1 + 0 * SIZE] STF y2, [Y1 + 1 * SIZE] STF y3, [Y1 + 2 * SIZE] STF y4, [Y1 + 3 * SIZE] add A1, 4 * SIZE, A1 add Y1, 4 * SIZE, Y1.LL38: andcc M, 1, I ble,pn %icc, .LL990 nop LDF [A1 + 0 * SIZE], a1 LDF [A1 + 1 * SIZE], a2 LDF [Y1 + 0 * SIZE], y1 LDF [Y1 + 1 * SIZE], y2 FMUL a1, x1, t1 FMUL a1, x2, t2 FMUL a2, x2, t3 FMUL a2, x1, t4 FADD y1, t1, y1 FADD y2, t2, y2 FSUBX y1, t3, y1 FADDX y2, t4, y2 STF y1, [Y1 + 0 * SIZE] STF y2, [Y1 + 1 * SIZE].LL990: cmp INCY, 2 * SIZE be %icc, .LL999 mov Y, Y1 sra M, 2, I cmp I, 0 ble,pn %icc, .LL995 nop.LL991: LDF [BUFFER + 0 * SIZE], a1 LDF [BUFFER + 1 * SIZE], a2 LDF [Y + 0 * SIZE], y1 LDF [Y + 1 * SIZE], y2 add Y, INCY, Y LDF [BUFFER + 2 * SIZE], a3 LDF [BUFFER + 3 * SIZE], a4 LDF [Y + 0 * SIZE], y3 LDF [Y + 1 * SIZE], y4 add Y, INCY, Y LDF [BUFFER + 4 * SIZE], a5 LDF [BUFFER + 5 * SIZE], a6 LDF [Y + 0 * SIZE], y5 LDF [Y + 1 * SIZE], y6 add Y, INCY, Y LDF [BUFFER + 6 * SIZE], a7 LDF [BUFFER + 7 * SIZE], a8 LDF [Y + 0 * SIZE], y7 LDF [Y + 1 * SIZE], y8 add Y, INCY, Y FADD y1, a1, y1 FADD y2, a2, y2 FADD y3, a3, y3 FADD y4, a4, y4 FADD y5, a5, y5 FADD y6, a6, y6 FADD y7, a7, y7 FADD y8, a8, y8 STF y1, [Y1 + 0 * SIZE] STF y2, [Y1 + 1 * SIZE] add Y1, INCY, Y1 STF y3, [Y1 + 0 * SIZE] STF y4, [Y1 + 1 * SIZE] add Y1, INCY, Y1 STF y5, [Y1 + 0 * SIZE] STF y6, [Y1 + 1 * SIZE] add Y1, INCY, Y1 STF y7, [Y1 + 0 * SIZE] STF y8, [Y1 + 1 * SIZE] add Y1, INCY, Y1 deccc I bg,pn %icc, .LL991 add BUFFER, 8 * SIZE, BUFFER.LL995: andcc M, 2, I ble,pn %icc, .LL996 nop LDF [BUFFER + 0 * SIZE], a1 LDF [BUFFER + 1 * SIZE], a2 LDF [Y + 0 * SIZE], y1 LDF [Y + 1 * SIZE], y2 add Y, INCY, Y LDF [BUFFER + 2 * SIZE], a3 LDF [BUFFER + 3 * SIZE], a4 LDF [Y + 0 * SIZE], y3 LDF [Y + 1 * SIZE], y4 add Y, INCY, Y FADD y1, a1, y1 FADD y2, a2, y2 FADD y3, a3, y3 FADD y4, a4, y4 STF y1, [Y1 + 0 * SIZE] STF y2, [Y1 + 1 * SIZE] add Y1, INCY, Y1 STF y3, [Y1 + 0 * SIZE] STF y4, [Y1 + 1 * SIZE] add Y1, INCY, Y1 add BUFFER, 4 * SIZE, BUFFER .LL996: andcc M, 1, I ble,pn %icc, .LL999 nop LDF [BUFFER + 0 * SIZE], a1 LDF [BUFFER + 1 * SIZE], a2 LDF [Y + 0 * SIZE], y1 LDF [Y + 1 * SIZE], y2 FADD y1, a1, y1 FADD y2, a2, y2 STF y1, [Y1 + 0 * SIZE] STF y2, [Y1 + 1 * SIZE].LL999: return %i7 + 8 clr %o0 EPILOGUE
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -