nb_kernel030_ia64_single.s

来自「最著名最快的分子模拟软件」· S 代码 · 共 970 行 · 第 1/2 页

S
970
字号
/* * $Id: nb_kernel030_ia64_single.S,v 1.2 2005/01/25 12:11:51 lindahl Exp $ * * Gromacs 4.0                         Copyright (c) 1991-2003 * David van der Spoel, Erik Lindahl, University of Groningen. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org *  * And Hey: * Gnomes, ROck Monsters And Chili Sauce */#ifdef HAVE_CONFIG_H#include <config.h>#endif/* * The ia64-assembly Gromacs inner loops would not have been * possible without a lot of support, tutoring and optimization  * suggestions from John Worley at Hewlett-Packard. *//* Each thread locks a counter and grabs a couple of neighborlists. * Available sizes for this chunk: 1,2,4,8, or 16  */#define THREAD_CHUNK_SIZE       8#define JJNR_PREFETCH_DISTANCE  128//	ia64 General Register definitions:	#define	zero	r0	/* permanent zero					*/#define	gp		r1	/* global data pointer				*/#define	at0		r2	/* temp, target of addi				*/#define	at1		r3	/* temp, target of addi				*/#define	S0		r4	/* callee saves register			*/#define	S1		r5	/* callee saves register			*/#define	S2		r6	/* callee saves register			*/#define	S3		r7	/* callee saves register			*/#define	v0		r8	/* 1st fixed point return value/ptr	*/#define	v1		r9	/* 2nd fixed return value/ptr		*/#define	v2		r10	/* 3rd fixed return value/ptr		*/#define	v3		r11	/* 4th fixed return value/ptr		*/#define	sp		r12	/* memory stack pointer				*/#define	tp		r13	/* thread pointer					*/#define	t0		r14	/* caller saves register			*/#define	t1		r15	/* caller saves register			*/#define	t2		r16	/* caller saves register			*/#define	t3		r17	/* caller saves register			*/#define	t4		r18	/* caller saves register			*/#define	t5		r19	/* caller saves register			*/#define	t6		r20	/* caller saves register			*/#define	t7		r21	/* caller saves register			*/#define	t8		r22	/* caller saves register			*/#define	t9		r23	/* caller saves register			*/#define	t10		r24	/* caller saves register			*/#define	t11		r25	/* caller saves register			*/#define	t12		r26	/* caller saves register			*/#define	t13		r27	/* caller saves register			*/#define	t14		r28	/* caller saves register			*/#define	t15		r29	/* caller saves register			*/#define	t16		r30	/* caller saves register			*/#define	t17		r31	/* caller saves register			*///	ia64 Floating-point register definitions#define	fZero	f0	/* permanent floating point 0.0		*/#define	fOne	f1	/* permanent floating point 1.0		*/#define	fs0		f2	/* callee saves register			*/#define	fs1		f3	/* callee saves register			*/#define	fs2		f4	/* callee saves register			*/#define	fs3		f5	/* callee saves register			*/	#define	ft0		f6	/* caller saves register			*/#define	ft1		f7	/* caller saves register			*/#define	fa0		f8	/* argument register 0				*/#define	fa1		f9	/* argument register 1				*/#define	fa2		f10	/* argument register 2				*/#define	fa3		f11	/* argument register 3				*/#define	fa4		f12	/* argument register 4				*/#define	fa5		f13	/* argument register 5				*/#define	fa6		f14	/* argument register 6				*/#define	fa7		f15	/* argument register 7				*/#define	fv0		f8	/* return value register 0			*/#define	fv1		f9	/* return value register 1			*/#define	fv2		f10	/* return value register 2			*/#define	fv3		f11	/* return value register 3			*/#define	fv4		f12	/* return value register 4			*/#define	fv5		f13	/* return value register 5			*/#define	fv6		f14	/* return value register 6			*/#define	fv7		f15	/* return value register 7			*/#define	fs4		f16	/* callee saves register			*/#define	fs5		f17	/* callee saves register			*/#define	fs6		f18	/* callee saves register			*/#define	fs7		f19	/* callee saves register			*/#define	fs8		f20	/* callee saves register			*/#define	fs9		f21	/* callee saves register			*/#define	fs10	f22	/* callee saves register			*/#define	fs11	f23	/* callee saves register			*/#define	fs12	f24	/* callee saves register			*/#define	fs13	f25	/* callee saves register			*/#define	fs14	f26	/* callee saves register			*/#define	fs15	f27	/* callee saves register			*/#define	fs16	f28	/* callee saves register			*/#define	fs17	f29	/* callee saves register			*/#define	fs18	f30	/* callee saves register			*/#define	fs19	f31	/* callee saves register			*/// ia64 predicate register definitions#define	pone	p0	/* permanent one predicate			*/#define	pTrue	p0	/* permanent one predicate			*/#define	ps0		p1	/* callee saves predicate			*/#define	ps1		p2	/* callee saves predicate			*/#define	ps2		p3	/* callee saves predicate			*/#define	ps3		p4	/* callee saves predicate			*/#define	ps4		p5	/* callee saves predicate			*/#define	pt0		p6	/* caller saves predicate			*/#define	pt1		p7	/* caller saves predicate			*/#define	pt2		p8	/* caller saves predicate			*/#define	pt3		p9	/* caller saves predicate			*/#define	pt4		p10	/* caller saves predicate			*/#define	pt5		p11	/* caller saves predicate			*/#define	pt6		p12	/* caller saves predicate			*/#define	pt7		p13	/* caller saves predicate			*/#define	pt8		p14	/* caller saves predicate			*/#define	pt9		p15	/* caller saves predicate			*/// ia64 branch register definitions#define	rb		b0	/* return link						*/#define	bs0		b1	/* callee saves branch register		*/#define	bs1		b2	/* callee saves branch register		*/#define	bs2		b3	/* callee saves branch register		*/#define	bs3		b4	/* callee saves branch register		*/#define	bs4		b5	/* callee saves branch register		*/	#define	bt0		b6	/* caller saves branch register		*/#define	bt1		b7	/* caller saves branch register		*/			.text#define	CHARGE		t10#define	FACTION		t9#define	FActII		loc8#define	FActIX		fs1#define	FActIY		fs2#define	FActIZ		fs3#define	FIX			fs4#define	FIY			fs5#define	FIZ			fs6#define	FSHIFT		t6#define	FShiftIS	loc9#define	FShiftX		fa5#define	FShiftY		fa6#define	FShiftZ		fa7#define	ICharge		fs5#define	InnerCnt	t17#define	II			t13#define	II3			in7#define	IQ			fs5#define	IS			t12#define	IS3			in6#define	IX			fa2#define	IY			fa3#define	IZ			fa4#define	In_FSHIFT	in6#define	In_GID		in7#define	In_IINR		in1#define	In_JINDEX	in2#define	In_JJNR		in3#define	In_NRI		in0#define	In_SHIFT	in4#define	In_SHIFTVEC	in5#define NRI			loc12#define IINR		loc13#define JINDEX		loc14#define JJNR		loc15#define SHIFT		loc16#define GID			loc17#define COUNT		loc18#define	JX			DX[0]#define	JY			DY[0]#define	JZ			DZ[0]#define	LCSave		at0#define	NJ0			t14#define	NJ1			t15#define	POSITION	t8#define	PRSave		at1#define	PosX		f88#define	PosY		f89#define	PosZ		f90#define	SHIFTVEC	t5#define	VC			t11#define	VCPtr		ggid#define VNBTotal    fs0#define Vvdw6		C6[2]#define Vvdw12		C12[2]#define RInv12      RInv6[1]#define	argPtr		loc23#define	chargePtr	v2#define	Tmp1		t0#define	Tmp2		t17#define	Tmp3		loc11#define	Tmp4		t2#define Tmp5		t3#define	fHALF		ft0#define	f3_8		ft1#define f5_16		fa0#define fSIX        fa1#define	fillP0		v0#define	fillP1		v1#define NN0			t0#define NN1			loc11#define	ggid		loc10#define	gidPtr		t7#define	iinrPtr		t1#define	jindexPtr	t2#define	jjnrPtr		t3#define	jnr			t16#define	jnr3		v0#define	nriCount	t0#define	pCont		pt0#define	pDone		pt1#define	pJJNR		pt2#define	pMore		pt3#define	pLast		pt4#define	posPtr		v3#define Facel       fa4#define Tabscale    fa1#define	shX			fa2#define	shY			fa3#define	shZ			fa4#define	shiftPtr	t4#define	shiftVPtr	v1#define	spillPtr	v0	/* used to be t7 */#define	spillPtr2   t0#define	xPFS		at0#define TYPE        loc19#define NTYPE       loc20#define typePtr     loc21#define NBFP		loc22#define NTI     	loc24#define VNBPtr      loc25#define VFTab       loc26#define Nouter      loc27#define Ninner      loc28#define OuterIter   loc29#define InnerIter   loc30#define VNB         loc31			#define nnnA		nA[0]#define R			RT[0]#define eps0		n0[1]#define eps1		n0[2]#define eps2		n0[3]#define	_NINPUTS	8#define	_NLOCALS	32#define	_NOUTPUT	0#define	_NROTATE	16	.regstk	8, 32, 0, 16	.rotr	FActPtr[7], TypeJ[4], nnn[2]	.rotf	DX[7], DY[7], DZ[7], FActX[3], FActY[3], FActZ[3], RSqr[3], RInv[6], RInvT[2], RInvU[2], RInvErr[2], FScalar[2], C6[5], C12[5], Disp_Y[3], Disp_F[3], Disp_G[3], Disp_H[2], Rep_Y[3], Rep_F[3], Rep_G[3], Rep_H[2], eps[2], RT[2], n0[2]	.rotp	pPipe[7]#define	PIPE_DEPTH	7#define	EXP(n)					(0xffff + (n))#define	POS_STK_OFFSET			0x10#define	FACTION_STK_OFFSET		0x18#define	CHARGE_STK_OFFSET		0x20#define	FACEL_STK_OFFSET		0x28#define	KRF_STK_OFFSET			0x30#define	CRF_STK_OFFSET			0x38#define	VC_STK_OFFSET			0x40#define	TYPE_STK_OFFSET			0x48#define	NTYPE_STK_OFFSET		0x50#define	NBFP_STK_OFFSET			0x58#define	VNB_STK_OFFSET			0x60#define	TABSCALE_STK_OFFSET		0x68#define	VFTAB_STK_OFFSET		0x70#define	INVSQRTA_STK_OFFSET		0x78#define	DVDA_STK_OFFSET			0x80#define	GBTABSCALE_STK_OFFSET		0x88#define	GBTAB_STK_OFFSET		0x90#define	NTHREADS_STK_OFFSET		0x98#define	COUNT_STK_OFFSET		0xA0#define MTX_STK_OFFSET			0xA8#define OUTERITER_STK_OFFSET		0xB0#define INNERITER_STK_OFFSET		0xB8#define WORK_STK_OFFSET     		0xC0// The mutex call parameter isnt used at all in assembly...	.global nb_kernel030_ia64_single	.proc	nb_kernel030_ia64_single	.align	32nb_kernel030_ia64_single://	INIT 1	{	.mmi		alloc			xPFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE		mov			spillPtr = sp		mov			Tmp1 = EXP(-1)	}	{	.mfi		nop			0x0		nop			0x0		add			argPtr = TABSCALE_STK_OFFSET, sp	} ;;//	INIT 2	{	.mlx		ld8			loc28 = [argPtr], COUNT_STK_OFFSET - TABSCALE_STK_OFFSET		movl			Tmp3 = 0x3ec00000	}	{	.mii		stf.spill		[spillPtr] = fs0, -16		nop			0x0		add			sp = -6 * 16, sp	} ;;//	INIT 3		{	.mlx		stf.spill		[spillPtr] = fs1, -16		movl			Tmp4 = 0x3ea00000	} 	{	.mmi										setf.exp		fHALF = Tmp1		ld8			COUNT = [argPtr], POS_STK_OFFSET - COUNT_STK_OFFSET		mov			PRSave	= pr	} ;;//	INIT 4		{	.mfi		ld8			POSITION = [argPtr], FACTION_STK_OFFSET - POS_STK_OFFSET		nop			0x0		nop			0x0	}	{	.mmi											nop			0x0		stf.spill		[spillPtr] = fs2, -16		mov			pr.rot	= 0x0	} ;;//  INIT 5		{	.mmi										ld8			FACTION = [argPtr], NTYPE_STK_OFFSET - FACTION_STK_OFFSET		setf.s			f3_8      = Tmp3		mov			SHIFTVEC   = In_SHIFTVEC	}   	{	.mmi		stf.spill		[spillPtr] = fs3, -16		ld4			NRI = [In_NRI]		mov			Nouter = 0	} ;;//  INIT 6		{	.mmi		ld8			NTYPE = [argPtr], TYPE_STK_OFFSET - NTYPE_STK_OFFSET		setf.s			f5_16 = Tmp4		mov			FSHIFT   = In_FSHIFT	} 	{	.mfi		stf.spill		[spillPtr] = fs4, -16		nop			0x0		mov			GID = In_GID	} ;;//  INIT 7		{	.mmf		ld8			TYPE = [argPtr], NBFP_STK_OFFSET - TYPE_STK_OFFSET		mov			SHIFT = In_SHIFT		fnorm			Facel = Facel	}	{	.mmi		stf.spill		[spillPtr] = fs5, -16		ldfs			Tabscale = [loc28]		nop			0x0	} ;;//  INIT 8		{	.mmf		ld8			NBFP = [argPtr], VFTAB_STK_OFFSET - NBFP_STK_OFFSET		mov			JJNR = In_JJNR		fnorm			f3_8 = f3_8	}		{	.mii		stf.spill		[spillPtr] = fs6, -16		mov			IINR = In_IINR		mov			JINDEX = In_JINDEX	} ;;//  INIT 9			{	.mmf		ld8			VFTab = [argPtr], VNB_STK_OFFSET - VFTAB_STK_OFFSET		stf.spill		[spillPtr] = fs7, -16		fnorm			f5_16 = f5_16	} ;; //  INIT 10	{	.mfi 		ld8			VNB = [argPtr], OUTERITER_STK_OFFSET - VNB_STK_OFFSET		fnorm			fHALF = fHALF		mov			LCSave = ar.lc	} ;;//  INIT 11	{	.mmi		ld8			OuterIter = [argPtr], INNERITER_STK_OFFSET - OUTERITER_STK_OFFSET		ld4			NTYPE = [NTYPE]                mov                     Ninner = 0	} ;;//  INIT 12	{	.mfi		ld8			InnerIter = [argPtr], WORK_STK_OFFSET - INNERITER_STK_OFFSET		nop			0x0		nop			0x0	} ;;//  20 bundles used for init - still aligned.	threadLoop://  THREAD PROLOGUE 1		{	.mfi				fetchadd4.rel	NN0 = [COUNT], THREAD_CHUNK_SIZE		nop				0x0		nop				0x0	}	{	.mfi		setf.sig		f33 = NTYPE		nop				0x0		nop				0x0	} ;;    //  THREAD PROLOGUE 2 - at least 12 cycle latency hole before this bundle (fetchadd4)	{	.mmi				cmp.lt			pCont, pDone = NN0, NRI		shladd			gidPtr = NN0, 2, GID		adds			NN1 = THREAD_CHUNK_SIZE, NN0	}	{	.mmi		shladd			jindexPtr = NN0, 2, JINDEX		shladd   		shiftPtr  = NN0, 2, SHIFT		shladd			iinrPtr   = NN0, 2, IINR	} ;; //  THREAD PROLOGUE 3 		{ .mmi					(pCont) ld4			II = [iinrPtr], 4	(pCont) ld4			IS = [shiftPtr], 4		cmp.ge			pLast, pMore = NN1, NRI	}	{ .mib	(pCont) ld4			NJ0 = [jindexPtr], 4	(pCont) adds		Tmp2 = 1, NN0	(pDone) br.cond.spnt.few finish	} ;; 		//  THREAD PROLOGUE 4		{ .mmi						ld4				ggid = [gidPtr], 4		shladd			II3 = II, 1, II		shladd			IS3 = IS, 1, IS	}	{ .mmi		ld4				NJ1 = [jindexPtr], 4		nop				0x0		shladd			jjnrPtr = NJ0, 2, JJNR	} ;;//  THREAD PROLOGUE 5		{ .mmi		cmp.lt			pCont, pDone = Tmp2, NRI								shladd			FShiftIS  = IS3, 2, FSHIFT		shladd			typePtr = II, 2, TYPE	}		{ .mmi

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?