📄 gemv_n.s
字号:
LFD a4, 11 * SIZE(AO6) FMADD y05, alpha6, a5, y05 FMADD y06, alpha6, a6, y06 FMADD y07, alpha6, a7, y07 FMADD y08, alpha6, a8, y08 LFD a5, 12 * SIZE(AO6) LFD a6, 13 * SIZE(AO6) LFD a7, 14 * SIZE(AO6) LFD a8, 15 * SIZE(AO6) addi AO6, AO6, 16 * SIZE nop nop PREFETCH_A6 FMADD y09, alpha6, a1, y09 FMADD y10, alpha6, a2, y10 FMADD y11, alpha6, a3, y11 FMADD y12, alpha6, a4, y12 LFD a1, 0 * SIZE(AO7) LFD a2, 1 * SIZE(AO7) LFD a3, 2 * SIZE(AO7) LFD a4, 3 * SIZE(AO7) FMADD y13, alpha6, a5, y13 FMADD y14, alpha6, a6, y14 FMADD y15, alpha6, a7, y15 FMADD y16, alpha6, a8, y16 LFD a5, 4 * SIZE(AO7) LFD a6, 5 * SIZE(AO7) LFD a7, 6 * SIZE(AO7) LFD a8, 7 * SIZE(AO7) FMADD y01, alpha7, a1, y01 FMADD y02, alpha7, a2, y02 FMADD y03, alpha7, a3, y03 FMADD y04, alpha7, a4, y04 LFD a1, 8 * SIZE(AO7) LFD a2, 9 * SIZE(AO7) LFD a3, 10 * SIZE(AO7) LFD a4, 11 * SIZE(AO7) FMADD y05, alpha7, a5, y05 FMADD y06, alpha7, a6, y06 FMADD y07, alpha7, a7, y07 FMADD y08, alpha7, a8, y08 LFD a5, 12 * SIZE(AO7) LFD a6, 13 * SIZE(AO7) LFD a7, 14 * SIZE(AO7) LFD a8, 15 * SIZE(AO7) addi AO7, AO7, 16 * SIZE nop nop PREFETCH_A7 FMADD y09, alpha7, a1, y09 FMADD y10, alpha7, a2, y10 FMADD y11, alpha7, a3, y11 FMADD y12, alpha7, a4, y12 LFD a1, 0 * SIZE(AO8) LFD a2, 1 * SIZE(AO8) LFD a3, 2 * SIZE(AO8) LFD a4, 3 * SIZE(AO8) FMADD y13, alpha7, a5, y13 FMADD y14, alpha7, a6, y14 FMADD y15, alpha7, a7, y15 FMADD y16, alpha7, a8, y16 LFD a5, 4 * SIZE(AO8) LFD a6, 5 * SIZE(AO8) LFD a7, 6 * SIZE(AO8) LFD a8, 7 * SIZE(AO8) FMADD y01, alpha8, a1, y01 FMADD y02, alpha8, a2, y02 FMADD y03, alpha8, a3, y03 FMADD y04, alpha8, a4, y04 LFD a1, 8 * SIZE(AO8) LFD a2, 9 * SIZE(AO8) LFD a3, 10 * SIZE(AO8) LFD a4, 11 * SIZE(AO8) FMADD y05, alpha8, a5, y05 FMADD y06, alpha8, a6, y06 FMADD y07, alpha8, a7, y07 FMADD y08, alpha8, a8, y08 LFD a5, 12 * SIZE(AO8) LFD a6, 13 * SIZE(AO8) LFD a7, 14 * SIZE(AO8) LFD a8, 15 * SIZE(AO8) addi AO8, AO8, 16 * SIZE nop nop PREFETCH_A8 FMADD y09, alpha8, a1, y09 FMADD y10, alpha8, a2, y10 FMADD y11, alpha8, a3, y11 FMADD y12, alpha8, a4, y12 LFD a1, 0 * SIZE(AO1) LFD a2, 1 * SIZE(AO1) LFD a3, 2 * SIZE(AO1) LFD a4, 3 * SIZE(AO1) FMADD y13, alpha8, a5, y13 FMADD y14, alpha8, a6, y14 FMADD y15, alpha8, a7, y15 FMADD y16, alpha8, a8, y16 LFD a5, 4 * SIZE(AO1) LFD a6, 5 * SIZE(AO1) LFD a7, 6 * SIZE(AO1) LFD a8, 7 * SIZE(AO1) STFD y01, 0 * SIZE(Y1) STFD y02, 1 * SIZE(Y1) STFD y03, 2 * SIZE(Y1) STFD y04, 3 * SIZE(Y1) LFD y01, 16 * SIZE(Y1) LFD y02, 17 * SIZE(Y1) LFD y03, 18 * SIZE(Y1) LFD y04, 19 * SIZE(Y1) PREFETCH_Y bdz LL(13) .align 4LL(12): FMADD y01, alpha1, a1, y01 FMADD y02, alpha1, a2, y02 FMADD y03, alpha1, a3, y03 FMADD y04, alpha1, a4, y04 LFD a1, 8 * SIZE(AO1) LFD a2, 9 * SIZE(AO1) LFD a3, 10 * SIZE(AO1) LFD a4, 11 * SIZE(AO1) STFD y05, 4 * SIZE(Y1) STFD y06, 5 * SIZE(Y1) STFD y07, 6 * SIZE(Y1) STFD y08, 7 * SIZE(Y1) LFD y05, 20 * SIZE(Y1) LFD y06, 21 * SIZE(Y1) LFD y07, 22 * SIZE(Y1) LFD y08, 23 * SIZE(Y1) FMADD y05, alpha1, a5, y05 FMADD y06, alpha1, a6, y06 FMADD y07, alpha1, a7, y07 FMADD y08, alpha1, a8, y08 LFD a5, 12 * SIZE(AO1) LFD a6, 13 * SIZE(AO1) LFD a7, 14 * SIZE(AO1) LFD a8, 15 * SIZE(AO1) STFD y09, 8 * SIZE(Y1) STFD y10, 9 * SIZE(Y1) STFD y11, 10 * SIZE(Y1) STFD y12, 11 * SIZE(Y1) LFD y09, 24 * SIZE(Y1) LFD y10, 25 * SIZE(Y1) LFD y11, 26 * SIZE(Y1) LFD y12, 27 * SIZE(Y1) FMADD y09, alpha1, a1, y09 FMADD y10, alpha1, a2, y10 FMADD y11, alpha1, a3, y11 FMADD y12, alpha1, a4, y12 LFD a1, 0 * SIZE(AO2) LFD a2, 1 * SIZE(AO2) LFD a3, 2 * SIZE(AO2) LFD a4, 3 * SIZE(AO2) STFD y13, 12 * SIZE(Y1) STFD y14, 13 * SIZE(Y1) STFD y15, 14 * SIZE(Y1) STFD y16, 15 * SIZE(Y1) LFD y13, 28 * SIZE(Y1) LFD y14, 29 * SIZE(Y1) LFD y15, 30 * SIZE(Y1) LFD y16, 31 * SIZE(Y1) FMADD y13, alpha1, a5, y13 FMADD y14, alpha1, a6, y14 FMADD y15, alpha1, a7, y15 FMADD y16, alpha1, a8, y16 LFD a5, 4 * SIZE(AO2) LFD a6, 5 * SIZE(AO2) LFD a7, 6 * SIZE(AO2) LFD a8, 7 * SIZE(AO2) FMADD y01, alpha2, a1, y01 FMADD y02, alpha2, a2, y02 FMADD y03, alpha2, a3, y03 FMADD y04, alpha2, a4, y04 LFD a1, 8 * SIZE(AO2) LFD a2, 9 * SIZE(AO2) LFD a3, 10 * SIZE(AO2) LFD a4, 11 * SIZE(AO2) FMADD y05, alpha2, a5, y05 FMADD y06, alpha2, a6, y06 FMADD y07, alpha2, a7, y07 FMADD y08, alpha2, a8, y08 LFD a5, 12 * SIZE(AO2) LFD a6, 13 * SIZE(AO2) LFD a7, 14 * SIZE(AO2) LFD a8, 15 * SIZE(AO2) FMADD y09, alpha2, a1, y09 FMADD y10, alpha2, a2, y10 FMADD y11, alpha2, a3, y11 FMADD y12, alpha2, a4, y12 LFD a1, 0 * SIZE(AO3) LFD a2, 1 * SIZE(AO3) LFD a3, 2 * SIZE(AO3) LFD a4, 3 * SIZE(AO3) FMADD y13, alpha2, a5, y13 FMADD y14, alpha2, a6, y14 FMADD y15, alpha2, a7, y15 FMADD y16, alpha2, a8, y16 LFD a5, 4 * SIZE(AO3) LFD a6, 5 * SIZE(AO3) LFD a7, 6 * SIZE(AO3) LFD a8, 7 * SIZE(AO3) FMADD y01, alpha3, a1, y01 FMADD y02, alpha3, a2, y02 FMADD y03, alpha3, a3, y03 FMADD y04, alpha3, a4, y04 LFD a1, 8 * SIZE(AO3) LFD a2, 9 * SIZE(AO3) LFD a3, 10 * SIZE(AO3) LFD a4, 11 * SIZE(AO3) FMADD y05, alpha3, a5, y05 FMADD y06, alpha3, a6, y06 FMADD y07, alpha3, a7, y07 FMADD y08, alpha3, a8, y08 LFD a5, 12 * SIZE(AO3) LFD a6, 13 * SIZE(AO3) LFD a7, 14 * SIZE(AO3) LFD a8, 15 * SIZE(AO3) FMADD y09, alpha3, a1, y09 FMADD y10, alpha3, a2, y10 FMADD y11, alpha3, a3, y11 FMADD y12, alpha3, a4, y12 LFD a1, 0 * SIZE(AO4) LFD a2, 1 * SIZE(AO4) LFD a3, 2 * SIZE(AO4) LFD a4, 3 * SIZE(AO4) FMADD y13, alpha3, a5, y13 FMADD y14, alpha3, a6, y14 FMADD y15, alpha3, a7, y15 FMADD y16, alpha3, a8, y16 LFD a5, 4 * SIZE(AO4) LFD a6, 5 * SIZE(AO4) LFD a7, 6 * SIZE(AO4) LFD a8, 7 * SIZE(AO4) FMADD y01, alpha4, a1, y01 FMADD y02, alpha4, a2, y02 FMADD y03, alpha4, a3, y03 FMADD y04, alpha4, a4, y04 LFD a1, 8 * SIZE(AO4) LFD a2, 9 * SIZE(AO4) LFD a3, 10 * SIZE(AO4) LFD a4, 11 * SIZE(AO4) FMADD y05, alpha4, a5, y05 FMADD y06, alpha4, a6, y06 FMADD y07, alpha4, a7, y07 FMADD y08, alpha4, a8, y08 LFD a5, 12 * SIZE(AO4) LFD a6, 13 * SIZE(AO4) LFD a7, 14 * SIZE(AO4) LFD a8, 15 * SIZE(AO4) addi AO1, AO1, 16 * SIZE addi AO2, AO2, 16 * SIZE addi AO3, AO3, 16 * SIZE addi AO4, AO4, 16 * SIZE PREFETCH_A1 PREFETCH_A2 PREFETCH_A3 PREFETCH_A4 FMADD y09, alpha4, a1, y09 FMADD y10, alpha4, a2, y10 FMADD y11, alpha4, a3, y11 FMADD y12, alpha4, a4, y12 LFD a1, 0 * SIZE(AO5) LFD a2, 1 * SIZE(AO5) LFD a3, 2 * SIZE(AO5) LFD a4, 3 * SIZE(AO5) FMADD y13, alpha4, a5, y13 FMADD y14, alpha4, a6, y14 FMADD y15, alpha4, a7, y15 FMADD y16, alpha4, a8, y16 LFD a5, 4 * SIZE(AO5) LFD a6, 5 * SIZE(AO5) LFD a7, 6 * SIZE(AO5) LFD a8, 7 * SIZE(AO5) FMADD y01, alpha5, a1, y01 FMADD y02, alpha5, a2, y02 FMADD y03, alpha5, a3, y03 FMADD y04, alpha5, a4, y04 LFD a1, 8 * SIZE(AO5) LFD a2, 9 * SIZE(AO5) LFD a3, 10 * SIZE(AO5) LFD a4, 11 * SIZE(AO5) FMADD y05, alpha5, a5, y05 FMADD y06, alpha5, a6, y06 FMADD y07, alpha5, a7, y07 FMADD y08, alpha5, a8, y08 LFD a5, 12 * SIZE(AO5) LFD a6, 13 * SIZE(AO5) LFD a7, 14 * SIZE(AO5) LFD a8, 15 * SIZE(AO5) FMADD y09, alpha5, a1, y09 FMADD y10, alpha5, a2, y10 FMADD y11, alpha5, a3, y11 FMADD y12, alpha5, a4, y12 LFD a1, 0 * SIZE(AO6) LFD a2, 1 * SIZE(AO6) LFD a3, 2 * SIZE(AO6) LFD a4, 3 * SIZE(AO6) FMADD y13, alpha5, a5, y13 FMADD y14, alpha5, a6, y14 FMADD y15, alpha5, a7, y15 FMADD y16, alpha5, a8, y16 LFD a5, 4 * SIZE(AO6) LFD a6, 5 * SIZE(AO6) LFD a7, 6 * SIZE(AO6) LFD a8, 7 * SIZE(AO6) FMADD y01, alpha6, a1, y01 FMADD y02, alpha6, a2, y02 FMADD y03, alpha6, a3, y03 FMADD y04, alpha6, a4, y04 LFD a1, 8 * SIZE(AO6) LFD a2, 9 * SIZE(AO6) LFD a3, 10 * SIZE(AO6) LFD a4, 11 * SIZE(AO6) FMADD y05, alpha6, a5, y05 FMADD y06, alpha6, a6, y06 FMADD y07, alpha6, a7, y07 FMADD y08, alpha6, a8, y08 LFD a5, 12 * SIZE(AO6) LFD a6, 13 * SIZE(AO6) LFD a7, 14 * SIZE(AO6) LFD a8, 15 * SIZE(AO6) FMADD y09, alpha6, a1, y09 FMADD y10, alpha6, a2, y10 FMADD y11, alpha6, a3, y11 FMADD y12, alpha6, a4, y12 LFD a1, 0 * SIZE(AO7) LFD a2, 1 * SIZE(AO7) LFD a3, 2 * SIZE(AO7) LFD a4, 3 * SIZE(AO7) FMADD y13, alpha6, a5, y13 FMADD y14, alpha6, a6, y14 FMADD y15, alpha6, a7, y15 FMADD y16, alpha6, a8, y16 LFD a5, 4 * SIZE(AO7) LFD a6, 5 * SIZE(AO7) LFD a7, 6 * SIZE(AO7) LFD a8, 7 * SIZE(AO7) FMADD y01, alpha7, a1, y01 FMADD y02, alpha7, a2, y02 FMADD y03, alpha7, a3, y03 FMADD y04, alpha7, a4, y04 LFD a1, 8 * SIZE(AO7) LFD a2, 9 * SIZE(AO7) LFD a3, 10 * SIZE(AO7) LFD a4, 11 * SIZE(AO7) FMADD y05, alpha7, a5, y05 FMADD y06, alpha7, a6, y06 FMADD y07, alpha7, a7, y07 FMADD y08, alpha7, a8, y08 LFD a5, 12 * SIZE(AO7) LFD a6, 13 * SIZE(AO7) LFD a7, 14 * SIZE(AO7) LFD a8, 15 * SIZE(AO7) FMADD y09, alpha7, a1, y09 FMADD y10, alpha7, a2, y10 FMADD y11, alpha7, a3, y11 FMADD y12, alpha7, a4, y12 LFD a1, 0 * SIZE(AO8) LFD a2, 1 * SIZE(AO8) LFD a3, 2 * SIZE(AO8) LFD a4, 3 * SIZE(AO8) FMADD y13, alpha7, a5, y13 FMADD y14, alpha7, a6, y14 FMADD y15, alpha7, a7, y15 FMADD y16, alpha7, a8, y16 LFD a5, 4 * SIZE(AO8) LFD a6, 5 * SIZE(AO8) LFD a7, 6 * SIZE(AO8) LFD a8, 7 * SIZE(AO8) FMADD y01, alpha8, a1, y01 FMADD y02, alpha8, a2, y02 FMADD y03, alpha8, a3, y03 FMADD y04, alpha8, a4, y04 LFD a1, 8 * SIZE(AO8) LFD a2, 9 * SIZE(AO8) LFD a3, 10 * SIZE(AO8) LFD a4, 11 * SIZE(AO8) FMADD y05, alpha8, a5, y05 FMADD y06, alpha8, a6, y06 FMADD y07, alpha8, a7, y07 FMADD y08, alpha8, a8, y08 LFD a5, 12 * SIZE(AO8) LFD a6, 13 * SIZE(AO8) LFD a7, 14 * SIZE(AO8) LFD a8, 15 * SIZE(AO8) addi AO5, AO5, 16 * SIZE addi AO6, AO6, 16 * SIZE addi AO7, AO7, 16 * SIZE addi AO8, AO8, 16 * SIZE PREFETCH_A5 PREFETCH_A6 PREFETCH_A7 PREFETCH_A8 FMADD y09, alpha8, a1, y09 FMADD y10, alpha8, a2, y10 FMADD y11, alpha8, a3, y11 FMADD y12, alpha8, a4, y12 LFD a1, 0 * SIZE(AO1) LFD a2, 1 * SIZE(AO1) LFD a3, 2 * SIZE(AO1) LFD a4, 3 * SIZE(AO1) FMADD y13, alpha8, a5, y13 FMADD y14, alpha8, a6, y14 FMADD y15, alpha8, a7, y15 FMADD y16, alpha8, a8, y16 LFD a5, 4 * SIZE(AO1) LFD a6, 5 * SIZE(AO1) LFD a7, 6 * SIZE(AO1) LFD a8, 7 * SIZE(AO1) STFD y01, 16 * SIZE(Y1) STFD y02, 17 * SIZE(Y1) STFD y03, 18 * SIZE(Y1) STFD y04, 19 * SIZE(Y1) LFD y01, 32 * SIZE(Y1) LFD y02, 33 * SIZE(Y1) LFD y03, 34 * SIZE(Y1) LFD y04, 35 * SIZE(Y1) PREFETCH_Y addi Y1, Y1, 16 * SIZE bdnz LL(12) .align 4LL(13): STFD y05, 4 * SIZE(Y1) STFD y06, 5 * SIZE(Y1) STFD y07, 6 * SIZE(Y1) STFD y08, 7 * SIZE(Y1) STFD y09, 8 * SIZE(Y1) STFD y10, 9 * SIZE(Y1) STFD y11, 10 * SIZE(Y1) STFD y12, 11 * SIZE(Y1) STFD y13, 12 * SIZE(Y1) STFD y14, 13 * SIZE(Y1) STFD y15, 14 * SIZE(Y1) STFD y16, 15 * SIZE(Y1) addi Y1, Y1, 16 * SIZE .align 4LL(15): andi. r0, M, 15 ble LL(19) andi. r0, M, 8 ble LL(16) LFD y01, 0 * SIZE(Y1) LFD y02, 1 * SIZE(Y1) LFD y03, 2 * SIZE(Y1) LFD y04, 3 * SIZE(Y1) LFD a1, 0 * SIZE(AO1) LFD a2, 1 * SIZE(AO1) LFD a3, 2 * SIZE(AO1) LFD a4, 3 * SIZE(AO1) LFD y05, 4 * SIZE(Y1) LFD y06, 5 * SIZE(Y1) LFD y07, 6 * SIZE(Y1) LFD y08, 7 * SIZE(Y1) LFD a5, 4 * SIZE(AO1) LFD a6, 5 * SIZE(AO1) LFD a7, 6 * SIZE(AO1) LFD a8, 7 * SIZE(AO1) FMADD y01, alpha1, a1, y01 LFD a1, 0 * SIZE(AO2) FMADD y02, alpha1, a2, y02 LFD a2, 1 * SIZE(AO2) FMADD y03, alpha1, a3, y03 LFD a3, 2 * SIZE(AO2) FMADD y04, alpha1, a4, y04 LFD a4, 3 * SIZE(AO2) FMADD y05, alpha1, a5, y05 LFD a5, 4 * SIZE(AO2) FMADD y06, alpha1, a6, y06 LFD a6, 5 * SIZE(AO2) FMADD y07, alpha1, a7, y07 LFD a7, 6 * SIZE(AO2) FMADD y08, alpha1, a8, y08 LFD a8, 7 * SIZE(AO2) FMADD y01, alpha2, a1, y01 LFD a1, 0 * SIZE(AO3) FMADD y02, alpha2, a2, y02 LFD a2, 1 * SIZE(AO3) FMADD y03, alpha2, a3, y03 LFD a3, 2 * SIZE(AO3) FMADD y04, alpha2, a4, y04 LFD a4, 3 * SIZE(AO3) FMADD y05, alpha2, a5, y05 LFD a5, 4 * SIZE(AO3) FMADD y06, alpha2, a6, y06 LFD a6, 5 * SIZE(AO3) FMADD y07, alpha2, a7, y07 LFD a7, 6 * SIZE(AO3) FMADD y08, alpha2, a8, y08 LFD a8, 7 * SIZE(AO3) FMADD y01, alpha3, a1, y01 LFD a1, 0 * SIZE(AO4) FMADD y02, alpha3, a2, y02 LFD a2, 1 * SIZE(AO4)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -