nb_kernel110nf_ia64_single.s

来自「最著名最快的分子模拟软件」· S 代码 · 共 888 行 · 第 1/2 页

S
888
字号
/* * $Id: nb_kernel110nf_ia64_single.S,v 1.2 2005/01/25 12:11:51 lindahl Exp $ * * Gromacs 4.0                         Copyright (c) 1991-2003 * David van der Spoel, Erik Lindahl, University of Groningen. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org *  * And Hey: * Gnomes, ROck Monsters And Chili Sauce */#ifdef HAVE_CONFIG_H#include <config.h>#endif/* * The ia64-assembly Gromacs inner loops would not have been * possible without a lot of support and optimization suggestions  * from John Worley at Hewlett-Packard. *//* Each thread locks a counter and grabs a couple of neighborlists. * Available sizes for this chunk: 1,2,4,8, or 16  */#define THREAD_CHUNK_SIZE       8#define JJNR_PREFETCH_DISTANCE  128#define	zero	r0	/* permanent zero					*/#define	gp		r1	/* global data pointer				*/#define	at0		r2	/* temp, target of addi				*/#define	at1		r3	/* temp, target of addi				*/#define	S0		r4	/* callee saves register			*/#define	S1		r5	/* callee saves register			*/#define	S2		r6	/* callee saves register			*/#define	S3		r7	/* callee saves register			*/#define	v0		r8	/* 1st fixed point return value/ptr	*/#define	v1		r9	/* 2nd fixed return value/ptr		*/#define	v2		r10	/* 3rd fixed return value/ptr		*/#define	v3		r11	/* 4th fixed return value/ptr		*/#define	sp		r12	/* memory stack pointer				*/#define	tp		r13	/* thread pointer					*/#define	t0		r14	/* caller saves register			*/#define	t1		r15	/* caller saves register			*/#define	t2		r16	/* caller saves register			*/#define	t3		r17	/* caller saves register			*/#define	t4		r18	/* caller saves register			*/#define	t5		r19	/* caller saves register			*/#define	t6		r20	/* caller saves register			*/#define	t7		r21	/* caller saves register			*/#define	t8		r22	/* caller saves register			*/#define	t9		r23	/* caller saves register			*/#define	t10		r24	/* caller saves register			*/#define	t11		r25	/* caller saves register			*/#define	t12		r26	/* caller saves register			*/#define	t13		r27	/* caller saves register			*/#define	t14		r28	/* caller saves register			*/#define	t15		r29	/* caller saves register			*/#define	t16		r30	/* caller saves register			*/#define	t17		r31	/* caller saves register			*///	ia64 Floating-point register definitions#define	fZero	f0	/* permanent floating point 0.0		*/#define	fOne	f1	/* permanent floating point 1.0		*/#define	fs0		f2	/* callee saves register			*/#define	fs1		f3	/* callee saves register			*/#define	fs2		f4	/* callee saves register			*/#define	fs3		f5	/* callee saves register			*/	#define	ft0		f6	/* caller saves register			*/#define	ft1		f7	/* caller saves register			*/#define	fa0		f8	/* argument register 0				*/#define	fa1		f9	/* argument register 1				*/#define	fa2		f10	/* argument register 2				*/#define	fa3		f11	/* argument register 3				*/#define	fa4		f12	/* argument register 4				*/#define	fa5		f13	/* argument register 5				*/#define	fa6		f14	/* argument register 6				*/#define	fa7		f15	/* argument register 7				*/#define	fv0		f8	/* return value register 0			*/#define	fv1		f9	/* return value register 1			*/#define	fv2		f10	/* return value register 2			*/#define	fv3		f11	/* return value register 3			*/#define	fv4		f12	/* return value register 4			*/#define	fv5		f13	/* return value register 5			*/#define	fv6		f14	/* return value register 6			*/#define	fv7		f15	/* return value register 7			*/#define	fs4		f16	/* callee saves register			*/#define	fs5		f17	/* callee saves register			*/#define	fs6		f18	/* callee saves register			*/#define	fs7		f19	/* callee saves register			*/#define	fs8		f20	/* callee saves register			*/#define	fs9		f21	/* callee saves register			*/#define	fs10	f22	/* callee saves register			*/#define	fs11	f23	/* callee saves register			*/#define	fs12	f24	/* callee saves register			*/#define	fs13	f25	/* callee saves register			*/#define	fs14	f26	/* callee saves register			*/#define	fs15	f27	/* callee saves register			*/#define	fs16	f28	/* callee saves register			*/#define	fs17	f29	/* callee saves register			*/#define	fs18	f30	/* callee saves register			*/#define	fs19	f31	/* callee saves register			*/// ia64 predicate register definitions#define	pone	p0	/* permanent one predicate			*/#define	pTrue	p0	/* permanent one predicate			*/#define	ps0		p1	/* callee saves predicate			*/#define	ps1		p2	/* callee saves predicate			*/#define	ps2		p3	/* callee saves predicate			*/#define	ps3		p4	/* callee saves predicate			*/#define	ps4		p5	/* callee saves predicate			*/#define	pt0		p6	/* caller saves predicate			*/#define	pt1		p7	/* caller saves predicate			*/#define	pt2		p8	/* caller saves predicate			*/#define	pt3		p9	/* caller saves predicate			*/#define	pt4		p10	/* caller saves predicate			*/#define	pt5		p11	/* caller saves predicate			*/#define	pt6		p12	/* caller saves predicate			*/#define	pt7		p13	/* caller saves predicate			*/#define	pt8		p14	/* caller saves predicate			*/#define	pt9		p15	/* caller saves predicate			*/// ia64 branch register definitions#define	rb		b0	/* return link						*/#define	bs0		b1	/* callee saves branch register		*/#define	bs1		b2	/* callee saves branch register		*/#define	bs2		b3	/* callee saves branch register		*/#define	bs3		b4	/* callee saves branch register		*/#define	bs4		b5	/* callee saves branch register		*/	#define	bt0		b6	/* caller saves branch register		*/#define	bt1		b7	/* caller saves branch register		*/			.text#define	CHARGE		t10#define	FACTION		t9#define	FActII		loc8#define	FActIX		fs1#define	FActIY		fs2#define	FActIZ		fs3#define	FIX			fs8#define	FIY			fs9#define	FIZ			fs10#define	FSHIFT		t6#define	FShiftIS	loc9#define	FShiftX		fs5#define	FShiftY		fs6#define	FShiftZ		fs7#define	Facel		fa3#define	ICharge		fa7#define	InnerCnt	t17#define	II			t13#define	II3			in7#define	IQ			fa7#define	IS			t12#define	IS3			in6#define	IX			fa4#define	IY			fa5#define	IZ			fa6#define	In_FSHIFT	in6#define	In_GID		in7#define	In_IINR		in1#define	In_JINDEX	in2#define	In_JJNR		in3#define	In_NRI		in0#define	In_SHIFT	in4#define	In_SHIFTVEC	in5#define NRI			loc12#define IINR		loc13#define JINDEX		loc14#define JJNR		loc15#define SHIFT		loc16#define GID			loc17#define COUNT		loc18#define	JX			DX[0]#define	JY			DY[0]#define	JZ			DZ[0]#define	LCSave		at0#define	NJ0			t14#define	NJ1			t15#define	POSITION	t8#define	PRSave		at1#define	PosX		f88#define	PosY		f89#define	PosZ		f90#define	QCharge		Charge[0]#define	QQ			Charge[3]#define	SHIFTVEC	t5#define	VC			t11#define	VCPtr		ggid#define	VCTotal		fs0#define VNBTotal    fs4#define	VCoul		Charge[3]#define	VCoul2		Charge[4]#define Vvdw6		C6[2]#define Vvdw12		C12[2]#define RInv12      RInv6[1]#define	argPtr		loc23#define	chargePtr	v2#define	Tmp1		t0#define	Tmp2		t17#define	Tmp3		loc11#define	Tmp4		t2#define	fHALF		ft0#define	f3_8		ft1#define f5_16		fa0#define fSIX        fa1#define fTWELVE     fa2#define	fillP0		v0#define	fillP1		v1#define NN0			t0#define NN1			loc11#define	ggid		loc10#define	gidPtr		t7#define	iinrPtr		t1#define	jindexPtr	t2#define	jjnrPtr		t3#define	jnr			t16#define	jnr3		v0#define	nriCount	t0#define	pCont		pt0#define	pDone		pt1#define	pJJNR		pt2#define	pMore		pt3#define	pLast		pt4#define	posPtr		v3#define	shX			fa4#define	shY			fa5#define	shZ			fa6#define	shiftPtr	t4#define	shiftVPtr	v1#define	spillPtr	v0#define	spillPtr2   t0#define	xPFS		at0#define TYPE        loc19#define NTYPE       loc20#define typePtr     loc21#define NBFP		loc22#define NTI     	loc24#define VNBPtr      loc25#define Nouter      loc26#define Ninner      loc27#define OuterIter   loc28#define InnerIter   loc29#define VNB         loc30#define pFacel      loc31	#define	_NINPUTS	8#define	_NLOCALS	32#define	_NOUTPUT	0#define	_NROTATE	16#define	EXP(n)					(0xffff + (n))#define	POS_STK_OFFSET			0x10#define	FACTION_STK_OFFSET		0x18#define	CHARGE_STK_OFFSET		0x20#define	FACEL_STK_OFFSET		0x28#define	KRF_STK_OFFSET			0x30#define	CRF_STK_OFFSET			0x38#define	VC_STK_OFFSET			0x40#define	TYPE_STK_OFFSET			0x48#define	NTYPE_STK_OFFSET		0x50#define	NBFP_STK_OFFSET			0x58#define	VNB_STK_OFFSET			0x60#define	TABSCALE_STK_OFFSET		0x68#define	VFTAB_STK_OFFSET		0x70#define	INVSQRTA_STK_OFFSET		0x78#define	DVDA_STK_OFFSET			0x80#define	GBTABSCALE_STK_OFFSET		0x88#define	GBTAB_STK_OFFSET		0x90#define	NTHREADS_STK_OFFSET		0x98#define	COUNT_STK_OFFSET		0xA0#define MTX_STK_OFFSET			0xA8#define OUTERITER_STK_OFFSET		0xB0#define INNERITER_STK_OFFSET		0xB8#define WORK_STK_OFFSET     		0xC0	.regstk	8, 32, 0, 16	.rotr	TypeJ[4]	.rotf	DX[6], DY[6], DZ[6], Charge[8], RSqr[2], RInv[4], RInvT[2], RInvU[2], RInvErr[2], RInv2[3], RInv6[3], C6[4], C12[5]	.rotp	pPipe[8]#define	PIPE_DEPTH	8	.global nb_kernel110nf_ia64_single	.proc	nb_kernel110nf_ia64_single	.align	32nb_kernel110nf_ia64_single://	INIT 1	{	.mmi		alloc			xPFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE		mov				spillPtr = sp		mov				Tmp1 = EXP(-1)	}	{	.mfi		nop				0x0		nop				0x0		add				argPtr = FACEL_STK_OFFSET, sp	} ;;//	INIT 2	{	.mlx											ld8			pFacel = [argPtr], COUNT_STK_OFFSET - FACEL_STK_OFFSET		movl			Tmp2 = 0x40c00000   // 6.0	}	{	.mii		stf.spill		[spillPtr] = fs0, -16		mov				LCSave = ar.lc		add				sp = -10 * 16, sp	} ;;//	INIT 3		{	.mlx											stf.spill		[spillPtr] = fs1, -16		movl			Tmp3 = 0x3ec00000	} 	{	.mmi										setf.exp		fHALF = Tmp1		ld8				COUNT = [argPtr], POS_STK_OFFSET - COUNT_STK_OFFSET		mov				PRSave	= pr	} ;;//	INIT 4		{	.mlx		ld8				POSITION = [argPtr], CHARGE_STK_OFFSET - POS_STK_OFFSET		movl			Tmp4 = 0x3ea00000	}	{	.mmi											setf.s			fSIX  = Tmp2		stf.spill		[spillPtr] = fs2, -16		mov				pr.rot	= 0x0	} ;;//  INIT 5		{	.mmi										ld8			CHARGE = [argPtr], FACTION_STK_OFFSET - CHARGE_STK_OFFSET		setf.s			f3_8      = Tmp3		mov			SHIFTVEC   = In_SHIFTVEC	}   	{	.mmi		stf.spill		[spillPtr] = fs3, -16		ldfs			Facel = [pFacel]		nop			0x0	} ;;//  INIT 6		{	.mmi		ld8				FACTION = [argPtr], VC_STK_OFFSET - FACTION_STK_OFFSET		setf.s			f5_16 = Tmp4		mov				FSHIFT   = In_FSHIFT	} 	{	.mfi		stf.spill		[spillPtr] = fs4, -16		nop			0x0		mov			GID = In_GID	} ;;//  INIT 7		{	.mmf		ld8			VC = [argPtr], NTYPE_STK_OFFSET - VC_STK_OFFSET		mov			SHIFT = In_SHIFT		nop			0x0	}	{	.mmf		stf.spill		[spillPtr] = fs5, -16		ld4			NRI = [In_NRI]		fnorm 			fSIX = fSIX	} ;;//  INIT 8		{	.mmf		ld8			NTYPE = [argPtr], TYPE_STK_OFFSET - NTYPE_STK_OFFSET		mov			JJNR = In_JJNR		fnorm			f3_8 = f3_8	}		{	.mii		stf.spill		[spillPtr] = fs6, -16		mov			IINR = In_IINR		mov			JINDEX = In_JINDEX	} ;;//  INIT 9			{	.mmf		ld8			TYPE = [argPtr], NBFP_STK_OFFSET - TYPE_STK_OFFSET		stf.spill		[spillPtr] = fs7, -16		fnorm			f5_16 = f5_16	} ;; //  INIT 10	{	.mmi 		ld8			NBFP = [argPtr], VNB_STK_OFFSET - NBFP_STK_OFFSET		stf.spill		[spillPtr] = fs8, -16		nop			0x0	} ;;//  INIT 11		{	.mmf		ld8			VNB = [argPtr],	OUTERITER_STK_OFFSET - VNB_STK_OFFSET		stf.spill		[spillPtr] = fs9, -16		fadd			fTWELVE = fSIX, fSIX	} ;;//  INIT 12	{	.mfi		stf.spill		[spillPtr] = fs10		fnorm			fHALF = fHALF		nop			0x0	} ;;//  INIT 13	{	.mmf		ld8			OuterIter = [argPtr], INNERITER_STK_OFFSET - OUTERITER_STK_OFFSET		ld4			NTYPE = [NTYPE]                fnorm                   Facel = Facel	} ;;//  INIT 14	{	.mfi		ld8			InnerIter = [argPtr]		nop			0x0		mov         Ninner = 0	} ;;//  22 bundles used for init - still aligned.	threadLoop_nf:

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?