📄 gemm_kernel_1x4.s
字号:
jne .L14#if defined(TRMMKERNEL) && !defined(LEFT) addl $4, KK#endif leal (, LDC, 4), %eax addl %eax, C movl B, B_ORIG decl J jne .L11 ALIGN_4.L20: movl N, %eax andl $2, %eax je .L30 ALIGN_3.L21:#if defined(TRMMKERNEL) && defined(LEFT) movl OFFSET, %eax movl %eax, KK#endif movl STACK_A, A movl C, %edi#if !defined(TRMMKERNEL) || \ (defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \ (defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA)) movl B_ORIG, B#else movl KK, %eax leal (, %eax, SIZE), %eax leal (B_ORIG, %eax, 2), B#endif#ifndef TRMMKERNEL movl K, %eax#elif (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) movl K, %eax subl KK, %eax movl %eax, KKK #else movl KK, %eax#ifdef LEFT addl $1, %eax#else addl $2, %eax#endif movl %eax, KKK#endif sarl $4, %eax jle .L23 ALIGN_4.L22: movl -16 * SIZE(B), %esi movl -8 * SIZE(B), %esi movl 0 * SIZE(B), %esi movl 8 * SIZE(B), %esi subl $-32 * SIZE, B decl %eax jne .L22 ALIGN_3.L23: movl M, %esi movl %esi, I ALIGN_3.L24:#if !defined(TRMMKERNEL) || \ (defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \ (defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA)) movl B_ORIG, B#else movl KK, %eax leal (, %eax, SIZE), %eax leal (A, %eax, 1), A leal (B_ORIG, %eax, 2), B#endif fldz fldz fldz fldz FLD -16 * SIZE(A) FLD -16 * SIZE(B) prefetchw 1 * SIZE(%edi) prefetchw 1 * SIZE(%edi, LDC)#ifndef TRMMKERNEL movl K, %eax#elif (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) movl K, %eax subl KK, %eax movl %eax, KKK #else movl KK, %eax#ifdef LEFT addl $1, %eax#else addl $2, %eax#endif movl %eax, KKK#endif sarl $3, %eax je .L26 ALIGN_3.L25: fmul %st(1), %st faddp %st, %st(2) FMUL -15 * SIZE(B) faddp %st, %st(2) FLD -15 * SIZE(A) FLD -14 * SIZE(B) fmul %st(1), %st faddp %st, %st(4) FMUL -13 * SIZE(B) faddp %st, %st(4) FLD -14 * SIZE(A) FLD -12 * SIZE(B) fmul %st(1), %st faddp %st, %st(2) FMUL -11 * SIZE(B) faddp %st, %st(2) FLD -13 * SIZE(A) FLD -10 * SIZE(B) fmul %st(1), %st faddp %st, %st(4) FMUL -9 * SIZE(B) faddp %st, %st(4) FLD -12 * SIZE(A) FLD -8 * SIZE(B) fmul %st(1), %st faddp %st, %st(2) FMUL -7 * SIZE(B) faddp %st, %st(2) FLD -11 * SIZE(A) FLD -6 * SIZE(B) fmul %st(1), %st faddp %st, %st(4) FMUL -5 * SIZE(B) faddp %st, %st(4) FLD -10 * SIZE(A) FLD -4 * SIZE(B) fmul %st(1), %st faddp %st, %st(2) FMUL -3 * SIZE(B) faddp %st, %st(2) FLD -9 * SIZE(A) FLD -2 * SIZE(B) fmul %st(1), %st faddp %st, %st(4) FMUL -1 * SIZE(B) faddp %st, %st(4) FLD -8 * SIZE(A) FLD 0 * SIZE(B) addl $ 8 * SIZE, A subl $-16 * SIZE, B decl %eax jne .L25 ALIGN_4.L26:#ifndef TRMMKERNEL movl K, %eax#else movl KKK, %eax#endif and $7, %eax je .L29 ALIGN_4.L27: fmul %st(1), %st faddp %st, %st(2) FMUL -15 * SIZE(B) faddp %st, %st(2) FLD -15 * SIZE(A) FLD -14 * SIZE(B) addl $1 * SIZE,A addl $2 * SIZE,B decl %eax jne .L27 ALIGN_4.L29: ffreep %st ffreep %st faddp %st, %st(2) faddp %st, %st(2) fxch %st(2) fmul %st, %st(1) fmul %st, %st(2) fxch %st(2)#ifndef TRMMKERNEL FADD (%edi) FST (%edi) FADD (%edi,LDC) FST (%edi,LDC)#else FST (%edi) FST (%edi,LDC)#endif#if (defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \ (defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA)) movl K, %eax subl KKK, %eax leal (,%eax, SIZE), %eax leal (A, %eax, 1), A leal (B, %eax, 2), B#endif#if defined(TRMMKERNEL) && defined(LEFT) addl $1, KK#endif addl $1 * SIZE, %edi decl I jne .L24#if defined(TRMMKERNEL) && !defined(LEFT) addl $2, KK#endif leal (, LDC, 2), %eax addl %eax, C movl B, B_ORIG ALIGN_4.L30: movl N, %eax andl $1, %eax je .L999 ALIGN_3.L31:#if defined(TRMMKERNEL) && defined(LEFT) movl OFFSET, %eax movl %eax, KK#endif movl STACK_A, A movl C, %edi#if !defined(TRMMKERNEL) || \ (defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \ (defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA)) movl B_ORIG, B#else movl KK, %eax leal (, %eax, SIZE), %eax leal (B_ORIG, %eax, 1), B#endif#ifndef TRMMKERNEL movl K, %eax#elif (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) movl K, %eax subl KK, %eax movl %eax, KKK #else movl KK, %eax#ifdef LEFT addl $1, %eax#else addl $1, %eax#endif movl %eax, KKK#endif sarl $5, %eax jle .L33 ALIGN_4.L32: movl -16 * SIZE(B), %esi movl -8 * SIZE(B), %esi movl 0 * SIZE(B), %esi movl 8 * SIZE(B), %esi subl $-32 * SIZE, B decl %eax jne .L32 ALIGN_3.L33: movl M, %esi movl %esi, I ALIGN_3.L34:#if !defined(TRMMKERNEL) || \ (defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \ (defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA)) movl B_ORIG, B#else movl KK, %eax leal (, %eax, SIZE), %eax leal (A, %eax, 1), A leal (B_ORIG, %eax, 1), B#endif fldz fldz fldz fldz prefetchw 1 * SIZE(%edi)#ifndef TRMMKERNEL movl K, %eax#elif (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) movl K, %eax subl KK, %eax movl %eax, KKK #else movl KK, %eax#ifdef LEFT addl $1, %eax#else addl $1, %eax#endif movl %eax, KKK#endif sarl $3, %eax je .L36 ALIGN_3.L35: FLD -16 * SIZE(A) FMUL -16 * SIZE(B) faddp %st, %st(1) FLD -15 * SIZE(A) FMUL -15 * SIZE(B) faddp %st, %st(2) FLD -14 * SIZE(A) FMUL -14 * SIZE(B) faddp %st, %st(3) FLD -13 * SIZE(A) FMUL -13 * SIZE(B) faddp %st, %st(4) FLD -12 * SIZE(A) FMUL -12 * SIZE(B) faddp %st, %st(1) FLD -11 * SIZE(A) FMUL -11 * SIZE(B) faddp %st, %st(2) FLD -10 * SIZE(A) FMUL -10 * SIZE(B) faddp %st, %st(3) FLD -9 * SIZE(A) FMUL -9 * SIZE(B) faddp %st, %st(4) addl $8 * SIZE, A addl $8 * SIZE, B decl %eax jne .L35 ALIGN_4.L36:#ifndef TRMMKERNEL movl K, %eax#else movl KKK, %eax#endif and $7, %eax je .L39 ALIGN_4.L37: FLD -16 * SIZE(A) FMUL -16 * SIZE(B) faddp %st, %st(1) addl $1 * SIZE,A addl $1 * SIZE,B decl %eax jne .L37 ALIGN_4.L39: faddp %st, %st(2) faddp %st, %st(2) faddp %st, %st(1) fmul %st(1), %st#ifndef TRMMKERNEL FADD (%edi) FST (%edi)#else FST (%edi)#endif#if (defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \ (defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA)) movl K, %eax subl KKK, %eax leal (,%eax, SIZE), %eax leal (A, %eax, 1), A leal (B, %eax, 1), B#endif#if defined(TRMMKERNEL) && defined(LEFT) addl $1, KK#endif addl $1 * SIZE, %edi decl I jne .L34#if defined(TRMMKERNEL) && !defined(LEFT) addl $1, KK#endif addl LDC, C movl B, B_ORIG ALIGN_4.L999: ffreep %st popl %ebx popl %esi popl %edi popl %ebp addl $ARGS, %esp ret EPILOGUE
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -