atl_sjik48x48x48tn48x48x0_a1_b0.asm

来自「基于Blas CLapck的.用过的人知道是干啥的」· 汇编 代码 · 共 1,351 行 · 第 1/2 页

ASM
1,351
字号
	%ifdef PREC_DST4		mov [esp+20],ecx		add ecx,[esp+12]		prefetchw [ecx-2*64]		prefetchw [ecx-1*64]		prefetchw [ecx+0*64]		prefetchw [ecx+1*64]		nop		prefetchw [ecx+2*64-1]		mov ecx,[esp+20]	%endif	%ifdef PREB_DST4		prefetch [ebx+48*4-2*64]		fnop		mov edx,edx		prefetch [ebx+48*4-1*64]		nop		prefetch [ebx+48*4+0*64]		nop		prefetch [ebx+48*4+1*64]		nop		prefetch [ebx+48*4+2*64]		nop	%endif	fstp dword [ecx+ELM19]	fxch st3	fstp dword [ecx+ELM20]	fxch st1	fstp dword [ecx+ELM21]	fstp dword [ecx+ELM22]	fstp dword [ecx+ELM23]	fstp dword [ecx+ELM24]	add eax,edx	fld dword [ebx+ELM1]			;01+5	fld dword [eax+DOTP2]	fmul st0,st1	fld dword [eax+DOTP3]	fmul st0,st2	fld dword [eax+DOTP1]	rep	fmul st0,st3	fxch st0,st3	fld dword [eax+DOTP5]	rep	fmul st0,st1	rep	fld dword [eax+DOTP6]	mov edx,edx	fmul st0,st2	fld dword [ebx+ELM2]	fld dword [eax+DOTP4]	rep	fmulp st4,st0	add eax,byte 30*4	mov edx,edx	OPERATION 2,3				;02+5	OPERATION 3,4				;03+5	OPERATION 4,5				;04+5	OPERATION 5,6				;05+5	OPERATION 6,7				;06+5	OPERATION 7,8				;07+5	OPERATION 8,9				;08+5	OPERATION 9,10				;09+5	OPERATION 10,11				;10+5	OPERATION 11,12				;11+5	OPERATION 12,13				;12+5	OPERATION 13,14				;13+5	OPERATION 14,15				;14+5	OPERATION 15,16				;15+5	OPERATION 16,17				;16+5	OPERATION 17,18				;17+5	OPERATION 18,19				;18+5	OPERATION 19,20				;19+5	OPERATION 20,21				;20+5	OPERATION 21,22				;21+5	OPERATION 22,23				;22+5	OPERATION 23,24				;23+5	OPERATION 24,25				;24+5	OPERATION 25,26				;25+5	OPERATION 26,27				;26+5	OPERATION 27,28				;27+5	OPERATION 28,29				;28+5	OPERATION 29,30				;29+5	OPERATION 30,31				;30+5	OPERATION 31,32				;31+5	OPERATION 32,33				;32+5	OPERATION 33,34				;33+5	OPERATION 34,35				;34+5	OPERATION 35,36				;35+5	OPERATION 36,37				;36+5	OPERATION 37,38				;37+5	OPERATION 38,39				;38+5	OPERATION 39,40				;39+5	OPERATION 40,41				;40+5	OPERATION 41,42				;41+5	OPERATION 42,43				;42+5	OPERATION 43,44				;43+5	OPERATION 44,45				;44+5	OPERATION 45,46				;45+5	OPERATION 46,47				;45+5	OPERATION 47,48				;45+5	fld dword [eax+DOTP1+ELM48]		;48+5	fmul st0,st1	faddp st7	fld dword [eax+DOTP2+ELM48]	fmul st0,st1	faddp st6	fld dword [eax+DOTP3+ELM48]	fmul st0,st1	faddp st5	fld dword [eax+DOTP4+ELM48]	fmul st0,st1	faddp st4	fld dword [eax+DOTP5+ELM48]	fmul st0,st1	faddp st3	rep	fmul dword [eax+DOTP6+ELM48]	faddp st1	fxch st5	%ifdef PREC_DST3		mov [esp+20],ecx		add ecx,[esp+12]		prefetchw [ecx-2*64]		prefetchw [ecx-1*64]		prefetchw [ecx+0*64]		prefetchw [ecx+1*64]		nop		prefetchw [ecx+2*64-1]		mov ecx,[esp+20]	%endif	%ifdef PREB_DST3		prefetch [ebx+48*4-2*64]		fnop		mov edx,edx		prefetch [ebx+48*4-1*64]		nop		prefetch [ebx+48*4+0*64]		nop		prefetch [ebx+48*4+1*64]		nop		prefetch [ebx+48*4+2*64]		nop	%endif	fstp dword [ecx+ELM25]	fxch st3	fstp dword [ecx+ELM26]	fxch st1	fstp dword [ecx+ELM27]	fstp dword [ecx+ELM28]	fstp dword [ecx+ELM29]	fstp dword [ecx+ELM30]	add eax,edx	fld dword [ebx+ELM1]			;01+6	fld dword [eax+DOTP2]	fmul st0,st1	fld dword [eax+DOTP3]	fmul st0,st2	fld dword [eax+DOTP1]	rep	fmul st0,st3	fxch st0,st3	fld dword [eax+DOTP5]	rep	fmul st0,st1	rep	fld dword [eax+DOTP6]	mov edx,edx	fmul st0,st2	fld dword [ebx+ELM2]	fld dword [eax+DOTP4]	rep	fmulp st4,st0	add eax,byte 30*4	mov edx,edx	OPERATION 2,3				;02+6	OPERATION 3,4				;03+6	OPERATION 4,5				;04+6	OPERATION 5,6				;05+6	OPERATION 6,7				;06+6	OPERATION 7,8				;07+6	OPERATION 8,9				;08+6	OPERATION 9,10				;09+6	OPERATION 10,11				;10+6	OPERATION 11,12				;11+6	OPERATION 12,13				;12+6	OPERATION 13,14				;13+6	OPERATION 14,15				;14+6	OPERATION 15,16				;15+6	OPERATION 16,17				;16+6	OPERATION 17,18				;17+6	OPERATION 18,19				;18+6	OPERATION 19,20				;19+6	OPERATION 20,21				;20+6	OPERATION 21,22				;21+6	OPERATION 22,23				;22+6	OPERATION 23,24				;23+6	OPERATION 24,25				;24+6	OPERATION 25,26				;25+6	OPERATION 26,27				;26+6	OPERATION 27,28				;27+6	OPERATION 28,29				;28+6	OPERATION 29,30				;29+6	OPERATION 30,31				;30+6	OPERATION 31,32				;31+6	OPERATION 32,33				;32+6	OPERATION 33,34				;33+6	OPERATION 34,35				;34+6	OPERATION 35,36				;35+6	OPERATION 36,37				;36+6	OPERATION 37,38				;37+6	OPERATION 38,39				;38+6	OPERATION 39,40				;39+6	OPERATION 40,41				;40+6	OPERATION 41,42				;41+6	OPERATION 42,43				;42+6	OPERATION 43,44				;43+6	OPERATION 44,45				;44+6	OPERATION 45,46				;45+6	OPERATION 46,47				;45+6	OPERATION 47,48				;45+6	fld dword [eax+DOTP1+ELM48]		;48+6	fmul st0,st1	faddp st7	fld dword [eax+DOTP2+ELM48]	fmul st0,st1	faddp st6	fld dword [eax+DOTP3+ELM48]	fmul st0,st1	faddp st5	fld dword [eax+DOTP4+ELM48]	fmul st0,st1	faddp st4	fld dword [eax+DOTP5+ELM48]	fmul st0,st1	faddp st3	rep	fmul dword [eax+DOTP6+ELM48]	faddp st1	fxch st5	%ifdef PREC_DST2		mov [esp+20],ecx		add ecx,[esp+12]		prefetchw [ecx-2*64]		prefetchw [ecx-1*64]		prefetchw [ecx+0*64]		prefetchw [ecx+1*64]		nop		prefetchw [ecx+2*64-1]		mov ecx,[esp+20]	%endif	%ifdef PREB_DST2		prefetch [ebx+48*4-2*64]		fnop		mov edx,edx		prefetch [ebx+48*4-1*64]		nop		prefetch [ebx+48*4+0*64]		nop		prefetch [ebx+48*4+1*64]		nop		prefetch [ebx+48*4+2*64]		nop	%endif	rep	fstp dword [ecx+ELM31]	fxch st3	fstp dword [ecx+ELM32]	fxch st1	fstp dword [ecx+ELM33]	fstp dword [ecx+ELM34]	fstp dword [ecx+ELM35]	fstp dword [ecx+ELM36]	add eax,edx	fld dword [ebx+ELM1]			;01+7	fld dword [eax+DOTP2]	fmul st0,st1	fld dword [eax+DOTP3]	fmul st0,st2	fld dword [eax+DOTP1]	rep	fmul st0,st3	fxch st0,st3	fld dword [eax+DOTP5]	rep	fmul st0,st1	rep	fld dword [eax+DOTP6]	mov edx,edx	fmul st0,st2	fld dword [ebx+ELM2]	fld dword [eax+DOTP4]	rep	fmulp st4,st0	add eax,byte 30*4	mov edx,edx	OPERATION 2,3				;02+7	OPERATION 3,4				;03+7	OPERATION 4,5				;04+7	OPERATION 5,6				;05+7	OPERATION 6,7				;06+7	OPERATION 7,8				;07+7	OPERATION 8,9				;08+7	OPERATION 9,10				;09+7	OPERATION 10,11				;10+7	OPERATION 11,12				;11+7	OPERATION 12,13				;12+7	OPERATION 13,14				;13+7	OPERATION 14,15				;14+7	OPERATION 15,16				;15+7	OPERATION 16,17				;16+7	OPERATION 17,18				;17+7	OPERATION 18,19				;18+7	OPERATION 19,20				;19+7	OPERATION 20,21				;20+7	OPERATION 21,22				;21+7	OPERATION 22,23				;22+7	OPERATION 23,24				;23+7	OPERATION 24,25				;24+7	OPERATION 25,26				;25+7	OPERATION 26,27				;26+7	OPERATION 27,28				;27+7	OPERATION 28,29				;28+7	OPERATION 29,30				;29+7	OPERATION 30,31				;30+7	OPERATION 31,32				;31+7	OPERATION 32,33				;32+7	OPERATION 33,34				;33+7	OPERATION 34,35				;34+7	OPERATION 35,36				;35+7	OPERATION 36,37				;36+7	OPERATION 37,38				;37+7	OPERATION 38,39				;38+7	OPERATION 39,40				;39+7	OPERATION 40,41				;40+7	OPERATION 41,42				;41+7	OPERATION 42,43				;42+7	OPERATION 43,44				;43+7	OPERATION 44,45				;44+7	OPERATION 45,46				;45+7	OPERATION 46,47				;45+7	OPERATION 47,48				;45+7	fld dword [eax+DOTP1+ELM48]		;48+7	fmul st0,st1	faddp st7	fld dword [eax+DOTP2+ELM48]	fmul st0,st1	faddp st6	fld dword [eax+DOTP3+ELM48]	fmul st0,st1	faddp st5	fld dword [eax+DOTP4+ELM48]	fmul st0,st1	faddp st4	fld dword [eax+DOTP5+ELM48]	fmul st0,st1	faddp st3	rep	fmul dword [eax+DOTP6+ELM48]	faddp st1	fxch st5	%ifdef PREC_DST1		mov [esp+20],ecx		add ecx,[esp+12]		prefetchw [ecx-2*64]		prefetchw [ecx-1*64]		prefetchw [ecx+0*64]		prefetchw [ecx+1*64]		nop		prefetchw [ecx+2*64-1]		mov ecx,[esp+20]	%endif	%ifdef PREB_DST1		prefetch [ebx+48*4-2*64]		fnop		mov edx,edx		prefetch [ebx+48*4-1*64]		nop		prefetch [ebx+48*4+0*64]		nop		prefetch [ebx+48*4+1*64]		nop		prefetch [ebx+48*4+2*64]		nop	%endif	fstp dword [ecx+ELM37]	fxch st3	fstp dword [ecx+ELM38]	fxch st1	fstp dword [ecx+ELM39]	fstp dword [ecx+ELM40]	fstp dword [ecx+ELM41]	fstp dword [ecx+ELM42]	add eax,edx	fld dword [ebx+ELM1]			;01+8	fld dword [eax+DOTP2]	fmul st0,st1	fld dword [eax+DOTP3]	fmul st0,st2	fld dword [eax+DOTP1]	rep	fmul st0,st3	fxch st0,st3	fld dword [eax+DOTP5]	rep	fmul st0,st1	rep	fld dword [eax+DOTP6]	mov edx,edx	fmul st0,st2	fld dword [ebx+ELM2]	fld dword [eax+DOTP4]	rep	fmulp st4,st0	add eax,byte 30*4	mov edx,edx	OPERATION 2,3				;02+8	OPERATION 3,4				;03+8	OPERATION 4,5				;04+8	OPERATION 5,6				;05+8	OPERATION 6,7				;06+8	OPERATION 7,8				;07+8	OPERATION 8,9				;08+8	OPERATION 9,10				;09+8	OPERATION 10,11				;10+8	OPERATION 11,12				;11+8	OPERATION 12,13				;12+8	OPERATION 13,14				;13+8	OPERATION 14,15				;14+8	OPERATION 15,16				;15+8	OPERATION 16,17				;16+8	OPERATION 17,18				;17+8	OPERATION 18,19				;18+8	OPERATION 19,20				;19+8	OPERATION 20,21				;20+8	OPERATION 21,22				;21+8	OPERATION 22,23				;22+8	OPERATION 23,24				;23+8	OPERATION 24,25				;24+8	OPERATION 25,26				;25+8	OPERATION 26,27				;26+8	OPERATION 27,28				;27+8	OPERATION 28,29				;28+8	OPERATION 29,30				;29+8	OPERATION 30,31				;30+8	OPERATION 31,32				;31+8	OPERATION 32,33				;32+8	OPERATION 33,34				;33+8	OPERATION 34,35				;34+8	OPERATION 35,36				;35+8	OPERATION 36,37				;36+8	OPERATION 37,38				;37+8	OPERATION 38,39				;38+8	OPERATION 39,40				;39+8	OPERATION 40,41				;40+8	OPERATION 41,42				;41+8	OPERATION 42,43				;42+8	OPERATION 43,44				;43+8	OPERATION 44,45				;44+8	OPERATION 45,46				;45+8	OPERATION 46,47				;45+8	OPERATION 47,48				;45+8	fld dword [eax+DOTP1+ELM48]		;48+8	fmul st0,st1	faddp st7	fld dword [eax+DOTP2+ELM48]	fmul st0,st1	faddp st6	fld dword [eax+DOTP3+ELM48]	fmul st0,st1	faddp st5	fld dword [eax+DOTP4+ELM48]	fmul st0,st1	faddp st4	fld dword [eax+DOTP5+ELM48]	fmul st0,st1	faddp st3	rep	fmul dword [eax+DOTP6+ELM48]	faddp st1	fxch st5	%ifdef PREA_EN		mov [esp+20],edx		;save edx in t1		mov edx,[esp+16]		;&A+1->edx		lea edx,[edx+ebx]		prefetch [edx-2*64]		nop		prefetch [edx-1*64]		prefetch [edx+0*64]		nop		prefetch [edx+1*64]		prefetch [edx+2*64-8]		mov edx,[esp+20]		;restore edx		mov eax,eax		fnop	%endif	fstp dword [ecx+ELM43]	fxch st3	fstp dword [ecx+ELM44]	fxch st1	fstp dword [ecx+ELM45]	fstp dword [ecx+ELM46]	fstp dword [ecx+ELM47]	fstp dword [ecx+ELM48]	sub ebx,edi				;next column of B	mov eax,[esp+4]				;reset eax	add ecx,[esp+12]			;next column of C (+ldc*4)	dec dword [esp+8]			;dec counter	jnz near loopj_end_	femms	pop ebp	add esp,byte 5*4			;remove local variables	pop edi					;restore registers	pop esi	pop ebx	leave                			;mov esp,ebp / pop ebp	ret

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?