nb_kernel310nf_ia64_single.s

来自「最著名最快的分子模拟软件」· S 代码 · 共 947 行 · 第 1/2 页

S
947
字号
/* * $Id: nb_kernel310nf_ia64_single.S,v 1.2 2005/01/25 12:11:51 lindahl Exp $ * * Gromacs 4.0                         Copyright (c) 1991-2003 * David van der Spoel, Erik Lindahl, University of Groningen. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org *  * And Hey: * Gnomes, ROck Monsters And Chili Sauce */#ifdef HAVE_CONFIG_H#include <config.h>#endif/* * The ia64-assembly Gromacs inner loops would not have been * possible without a lot of support and optimization suggestions  * from John Worley at Hewlett-Packard. *//* Each thread locks a counter and grabs a couple of neighborlists. * Available sizes for this chunk: 1,2,4,8, or 16  */#define THREAD_CHUNK_SIZE       8#define JJNR_PREFETCH_DISTANCE  128//	ia64 General Register definitions:	#define	zero	r0	/* permanent zero					*/#define	gp		r1	/* global data pointer				*/#define	at0		r2	/* temp, target of addi				*/#define	at1		r3	/* temp, target of addi				*/#define	S0		r4	/* callee saves register			*/#define	S1		r5	/* callee saves register			*/#define	S2		r6	/* callee saves register			*/#define	S3		r7	/* callee saves register			*/#define	v0		r8	/* 1st fixed point return value/ptr	*/#define	v1		r9	/* 2nd fixed return value/ptr		*/#define	v2		r10	/* 3rd fixed return value/ptr		*/#define	v3		r11	/* 4th fixed return value/ptr		*/#define	sp		r12	/* memory stack pointer				*/#define	tp		r13	/* thread pointer					*/#define	t0		r14	/* caller saves register			*/#define	t1		r15	/* caller saves register			*/#define	t2		r16	/* caller saves register			*/#define	t3		r17	/* caller saves register			*/#define	t4		r18	/* caller saves register			*/#define	t5		r19	/* caller saves register			*/#define	t6		r20	/* caller saves register			*/#define	t7		r21	/* caller saves register			*/#define	t8		r22	/* caller saves register			*/#define	t9		r23	/* caller saves register			*/#define	t10		r24	/* caller saves register			*/#define	t11		r25	/* caller saves register			*/#define	t12		r26	/* caller saves register			*/#define	t13		r27	/* caller saves register			*/#define	t14		r28	/* caller saves register			*/#define	t15		r29	/* caller saves register			*/#define	t16		r30	/* caller saves register			*/#define	t17		r31	/* caller saves register			*///	ia64 Floating-point register definitions#define	fZero	f0	/* permanent floating point 0.0		*/#define	fOne	f1	/* permanent floating point 1.0		*/#define	fs0		f2	/* callee saves register			*/#define	fs1		f3	/* callee saves register			*/#define	fs2		f4	/* callee saves register			*/#define	fs3		f5	/* callee saves register			*/	#define	ft0		f6	/* caller saves register			*/#define	ft1		f7	/* caller saves register			*/#define	fa0		f8	/* argument register 0				*/#define	fa1		f9	/* argument register 1				*/#define	fa2		f10	/* argument register 2				*/#define	fa3		f11	/* argument register 3				*/#define	fa4		f12	/* argument register 4				*/#define	fa5		f13	/* argument register 5				*/#define	fa6		f14	/* argument register 6				*/#define	fa7		f15	/* argument register 7				*/#define	fv0		f8	/* return value register 0			*/#define	fv1		f9	/* return value register 1			*/#define	fv2		f10	/* return value register 2			*/#define	fv3		f11	/* return value register 3			*/#define	fv4		f12	/* return value register 4			*/#define	fv5		f13	/* return value register 5			*/#define	fv6		f14	/* return value register 6			*/#define	fv7		f15	/* return value register 7			*/#define	fs4		f16	/* callee saves register			*/#define	fs5		f17	/* callee saves register			*/#define	fs6		f18	/* callee saves register			*/#define	fs7		f19	/* callee saves register			*/#define	fs8		f20	/* callee saves register			*/#define	fs9		f21	/* callee saves register			*/#define	fs10	f22	/* callee saves register			*/#define	fs11	f23	/* callee saves register			*/#define	fs12	f24	/* callee saves register			*/#define	fs13	f25	/* callee saves register			*/#define	fs14	f26	/* callee saves register			*/#define	fs15	f27	/* callee saves register			*/#define	fs16	f28	/* callee saves register			*/#define	fs17	f29	/* callee saves register			*/#define	fs18	f30	/* callee saves register			*/#define	fs19	f31	/* callee saves register			*/// ia64 predicate register definitions#define	pone	p0	/* permanent one predicate			*/#define	pTrue	p0	/* permanent one predicate			*/#define	ps0		p1	/* callee saves predicate			*/#define	ps1		p2	/* callee saves predicate			*/#define	ps2		p3	/* callee saves predicate			*/#define	ps3		p4	/* callee saves predicate			*/#define	ps4		p5	/* callee saves predicate			*/#define	pt0		p6	/* caller saves predicate			*/#define	pt1		p7	/* caller saves predicate			*/#define	pt2		p8	/* caller saves predicate			*/#define	pt3		p9	/* caller saves predicate			*/#define	pt4		p10	/* caller saves predicate			*/#define	pt5		p11	/* caller saves predicate			*/#define	pt6		p12	/* caller saves predicate			*/#define	pt7		p13	/* caller saves predicate			*/#define	pt8		p14	/* caller saves predicate			*/#define	pt9		p15	/* caller saves predicate			*/// ia64 branch register definitions#define	rb		b0	/* return link						*/#define	bs0		b1	/* callee saves branch register		*/#define	bs1		b2	/* callee saves branch register		*/#define	bs2		b3	/* callee saves branch register		*/#define	bs3		b4	/* callee saves branch register		*/#define	bs4		b5	/* callee saves branch register		*/	#define	bt0		b6	/* caller saves branch register		*/#define	bt1		b7	/* caller saves branch register		*/			.text#define	CHARGE		t10#define	FACTION		t9#define	FActII		loc8#define	FActIX		fs1#define	FActIY		fs2#define	FActIZ		fs3#define	FIX			fs10#define	FIY			fs11#define	FIZ			fs12#define	FSHIFT		t6#define	FShiftIS	loc9#define	FShiftX		fs7#define	FShiftY		fs8#define	FShiftZ		fs9#define	ICharge		fs5#define	InnerCnt	t17#define	II			t13#define	II3			in7#define	IQ			fs5#define	IS			t12#define	IS3			in6#define	IX			fa6#define	IY			fa7#define	IZ			fs4#define	In_FSHIFT	in6#define	In_GID		in7#define	In_IINR		in1#define	In_JINDEX	in2#define	In_JJNR		in3#define	In_NRI		in0#define	In_SHIFT	in4#define	In_SHIFTVEC	in5#define NRI			loc12#define IINR		loc13#define JINDEX		loc14#define JJNR		loc15#define SHIFT		loc16#define GID			loc17#define COUNT		loc18#define	JX			DX[0]#define	JY			DY[0]#define	JZ			DZ[0]#define	LCSave		at0#define	NJ0			t14#define	NJ1			t15#define	POSITION	t8#define	PRSave		at1#define	PosX		f88#define	PosY		f89#define	PosZ		f90#define	QCharge		Charge[0]#define	QQ			Charge[3]#define	SHIFTVEC	t5#define	VC			t11#define	VCPtr		ggid#define	VCTotal		fs0#define VNBTotal    fs6#define	VCoul		Charge[3]#define	VCoul2		Charge[4]#define R           RT[0]#define Vvdw12		C12[2]#define RInv12      RInv6[1]#define	argPtr		loc23#define	argPtr2		t4#define	chargePtr	v2#define	Tmp1		t0#define	Tmp2		t17#define	Tmp3		loc11#define	Tmp4		t2#define Tmp5		t3#define	fHALF		ft0#define	f3_8		ft1#define f5_16		fa0#define fSIX        fa1#define fTWELVE     fa2#define	fillP0		v0#define	fillP1		v1#define NN0			t0#define NN1			loc11#define	ggid		loc10#define	gidPtr		t7#define	iinrPtr		t1#define	jindexPtr	t2#define	jjnrPtr		t3#define	jnr			t16#define	jnr3		v0#define	nriCount	t0#define	pCont		pt0#define	pDone		pt1#define	pJJNR		pt2#define	pMore		pt3#define	pLast		pt4#define	posPtr		v3#define fTWO        fa3#define Facel       fa4#define Tabscale    fa5#define	shX			fa6#define	shY			fa7#define	shZ			fs4#define	shiftPtr	t4#define	shiftVPtr	v1#define	spillPtr	v0#define	spillPtr2   t0#define	xPFS		at0#define TYPE        loc19#define NTYPE       loc20#define typePtr     loc21#define NBFP		loc22#define NTI     	loc24#define VNBPtr      loc25#define VFTab       loc26#define nnn         loc27#define Nouter      loc28#define Ninner      loc29#define OuterIter   loc30#define InnerIter   loc31#define VNB         loc32	#define	_NINPUTS	8#define	_NLOCALS	33#define	_NOUTPUT	0#define	_NROTATE	16#define	EXP(n)					(0xffff + (n))#define	POS_STK_OFFSET			0x10#define	FACTION_STK_OFFSET		0x18#define	CHARGE_STK_OFFSET		0x20#define	FACEL_STK_OFFSET		0x28#define	KRF_STK_OFFSET			0x30#define	CRF_STK_OFFSET			0x38#define	VC_STK_OFFSET			0x40#define	TYPE_STK_OFFSET			0x48#define	NTYPE_STK_OFFSET		0x50#define	NBFP_STK_OFFSET			0x58#define	VNB_STK_OFFSET			0x60#define	TABSCALE_STK_OFFSET		0x68#define	VFTAB_STK_OFFSET		0x70#define	INVSQRTA_STK_OFFSET		0x78#define	DVDA_STK_OFFSET			0x80#define	GBTABSCALE_STK_OFFSET		0x88#define	GBTAB_STK_OFFSET		0x90#define	NTHREADS_STK_OFFSET		0x98#define	COUNT_STK_OFFSET		0xA0#define MTX_STK_OFFSET			0xA8#define OUTERITER_STK_OFFSET		0xB0#define INNERITER_STK_OFFSET		0xB8#define WORK_STK_OFFSET     		0xC0// Version without force calculation	.regstk	8, 33, 0, 16	.rotr   TypeJ[3]	.rotf	DX[6], DY[6], DZ[6], Charge[7], RSqr[3], RInv[5], RInvT[2], RInvU[2], RInvErr[2], RInv6[2], C6[5], C12[5], Y[3], F[3], G[3], H[2], RT[2], n0[2], eps[3]	.rotp	pPipe[7]#define	PIPE_DEPTH	7	.global nb_kernel310nf_ia64_single	.proc	nb_kernel310nf_ia64_single	.align	32nb_kernel310nf_ia64_single://	INIT 1	{	.mmi		alloc			xPFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE		mov			spillPtr = sp		mov			Tmp1 = EXP(-1)	}	{	.mfi		nop			0x0		fadd			fSIX = fSIX, fSIX		add			argPtr = FACEL_STK_OFFSET, sp	} ;;//	INIT 2	{	.mii		ld8			Ninner = [argPtr], COUNT_STK_OFFSET - FACEL_STK_OFFSET		nop			0x0		nop			0x0	}	{	.mii		stf.spill		[spillPtr] = fs0, -16		add			argPtr2 = TABSCALE_STK_OFFSET, sp				add			sp = -12 * 16, sp	} ;;//	INIT 3		{	.mlx											stf.spill		[spillPtr] = fs1, -16		movl			Tmp3 = 0x3ec00000	} 	{	.mmi										setf.exp		fHALF = Tmp1		ld8			COUNT = [argPtr], POS_STK_OFFSET - COUNT_STK_OFFSET		mov			PRSave	= pr	} ;;//	INIT 4		{	.mlx		ld8			POSITION = [argPtr], CHARGE_STK_OFFSET - POS_STK_OFFSET		movl			Tmp4 = 0x3ea00000	}	{	.mfi											stf.spill		[spillPtr] = fs2, -16		fma			fSIX = fSIX, fOne, fOne //3.0		mov			pr.rot	= 0x0	} ;;//  INIT 5		{	.mmi										ld8			CHARGE = [argPtr], FACTION_STK_OFFSET - CHARGE_STK_OFFSET		setf.s			f3_8      = Tmp3		mov			SHIFTVEC   = In_SHIFTVEC	}   	{	.mmf		stf.spill		[spillPtr] = fs3, -16		ld4			NRI = [In_NRI]		fadd			fTWO = fOne, fOne	} ;;//  INIT 6		{	.mmi		ld8			FACTION = [argPtr], VC_STK_OFFSET - FACTION_STK_OFFSET		setf.s			f5_16 = Tmp4		mov			FSHIFT   = In_FSHIFT	} 	{	.mmi		stf.spill		[spillPtr] = fs4, -16		ld8			Nouter = [argPtr2]		mov			GID = In_GID	} ;;//  INIT 7		{	.mmf		ld8			VC = [argPtr], NTYPE_STK_OFFSET - VC_STK_OFFSET		mov			SHIFT = In_SHIFT		fadd			fSIX = fSIX, fSIX	}	{	.mmi		stf.spill		[spillPtr] = fs5, -16		ldfs			Facel = [Ninner]		add			spillPtr2 = 32, sp	} ;;//  INIT 8		{	.mmf		ld8			NTYPE = [argPtr], TYPE_STK_OFFSET - NTYPE_STK_OFFSET		mov			JJNR = In_JJNR		fnorm			f3_8 = f3_8	}		{	.mii		stf.spill		[spillPtr] = fs6, -16		mov			IINR = In_IINR		mov			JINDEX = In_JINDEX	} ;;//  INIT 9			{	.mmf		ld8				TYPE = [argPtr], NBFP_STK_OFFSET - TYPE_STK_OFFSET		stf.spill		[spillPtr] = fs7, -16		fnorm			f5_16 = f5_16	} ;; //  INIT 10	{	.mmi 		ld8				NBFP = [argPtr], VFTAB_STK_OFFSET - NBFP_STK_OFFSET		stf.spill		[spillPtr] = fs8, -16		mov				LCSave = ar.lc	} ;;//  INIT 11		{	.mmf		ld8			VFTab = [argPtr], VNB_STK_OFFSET - VFTAB_STK_OFFSET		stf.spill		[spillPtr] = fs9, -32		fadd			fTWELVE = fSIX, fSIX	} 	{	.mmi		stf.spill		[spillPtr2] = fs10, -32		ld4			NTYPE = [NTYPE]		nop			0x0	} ;;//  INIT 12	{	.mfi		stf.spill		[spillPtr] = fs11		fnorm			fHALF = fHALF		nop			0x0	} 	{	.mmf		ld8			VNB = [argPtr], OUTERITER_STK_OFFSET - VNB_STK_OFFSET			stf.spill		[spillPtr2] = fs12		fnorm			fTWO = fTWO	} ;;//  INIT 13	{	.mmi		ld8			OuterIter = [argPtr], INNERITER_STK_OFFSET - OUTERITER_STK_OFFSET		ldfs			Tabscale = [Nouter]		mov			Ninner = 0	} ;;//  INIT 14	{	.mfi		ld8			InnerIter = [argPtr]		fnorm			Facel = Facel		mov			Nouter = 0	} ;;//  24 bundles used for init - still aligned.	threadLoop_nf://  THREAD PROLOGUE 1		{	.mfi				fetchadd4.rel	NN0 = [COUNT], THREAD_CHUNK_SIZE		nop				0x0		nop				0x0	}	{	.mfi		setf.sig		f33 = NTYPE		nop				0x0

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?