nb_kernel100nf_ia64_double.s

来自「最著名最快的分子模拟软件」· S 代码 · 共 786 行 · 第 1/2 页

S
786
字号
/* * $Id: nb_kernel100nf_ia64_double.S,v 1.2 2005/01/25 12:11:51 lindahl Exp $ * * Gromacs 4.0                         Copyright (c) 1991-2003 * David van der Spoel, Erik Lindahl, University of Groningen. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org *  * And Hey: * Gnomes, ROck Monsters And Chili Sauce */#ifdef HAVE_CONFIG_H#include <config.h>#endif/* * The ia64-assembly Gromacs inner loops would not have been * possible without a lot of support and optimization suggestions  * from John Worley at Hewlett-Packard. */#define THREAD_CHUNK_SIZE       8#define JJNR_PREFETCH_DISTANCE  128#define	zero	r0	/* permanent zero					*/#define	gp		r1	/* global data pointer				*/#define	at0		r2	/* temp, target of addi				*/#define	at1		r3	/* temp, target of addi				*/#define	S0		r4	/* callee saves register			*/#define	S1		r5	/* callee saves register			*/#define	S2		r6	/* callee saves register			*/#define	S3		r7	/* callee saves register			*/#define	v0		r8	/* 1st fixed point return value/ptr	*/#define	v1		r9	/* 2nd fixed return value/ptr		*/#define	v2		r10	/* 3rd fixed return value/ptr		*/#define	v3		r11	/* 4th fixed return value/ptr		*/#define	sp		r12	/* memory stack pointer				*/#define	tp		r13	/* thread pointer					*/#define	t0		r14	/* caller saves register			*/#define	t1		r15	/* caller saves register			*/#define	t2		r16	/* caller saves register			*/#define	t3		r17	/* caller saves register			*/#define	t4		r18	/* caller saves register			*/#define	t5		r19	/* caller saves register			*/#define	t6		r20	/* caller saves register			*/#define	t7		r21	/* caller saves register			*/#define	t8		r22	/* caller saves register			*/#define	t9		r23	/* caller saves register			*/#define	t10		r24	/* caller saves register			*/#define	t11		r25	/* caller saves register			*/#define	t12		r26	/* caller saves register			*/#define	t13		r27	/* caller saves register			*/#define	t14		r28	/* caller saves register			*/#define	t15		r29	/* caller saves register			*/#define	t16		r30	/* caller saves register			*/#define	t17		r31	/* caller saves register			*/#define	fZero	f0	/* permanent floating point 0.0		*/#define	fOne	f1	/* permanent floating point 1.0		*/#define	fs0		f2	/* callee saves register			*/#define	fs1		f3	/* callee saves register			*/#define	fs2		f4	/* callee saves register			*/#define	fs3		f5	/* callee saves register			*/	#define	ft0		f6	/* caller saves register			*/#define	ft1		f7	/* caller saves register			*/#define	fa0		f8	/* argument register 0				*/#define	fa1		f9	/* argument register 1				*/#define	fa2		f10	/* argument register 2				*/#define	fa3		f11	/* argument register 3				*/#define	fa4		f12	/* argument register 4				*/#define	fa5		f13	/* argument register 5				*/#define	fa6		f14	/* argument register 6				*/#define	fa7		f15	/* argument register 7				*/#define	fv0		f8	/* return value register 0			*/#define	fv1		f9	/* return value register 1			*/#define	fv2		f10	/* return value register 2			*/#define	fv3		f11	/* return value register 3			*/#define	fv4		f12	/* return value register 4			*/#define	fv5		f13	/* return value register 5			*/#define	fv6		f14	/* return value register 6			*/#define	fv7		f15	/* return value register 7			*/#define	fs4		f16	/* callee saves register			*/#define	fs5		f17	/* callee saves register			*/#define	fs6		f18	/* callee saves register			*/#define	fs7		f19	/* callee saves register			*/#define	fs8		f20	/* callee saves register			*/#define	fs9		f21	/* callee saves register			*/#define	fs10	f22	/* callee saves register			*/#define	fs11	f23	/* callee saves register			*/#define	fs12	f24	/* callee saves register			*/#define	fs13	f25	/* callee saves register			*/#define	fs14	f26	/* callee saves register			*/#define	fs15	f27	/* callee saves register			*/#define	fs16	f28	/* callee saves register			*/#define	fs17	f29	/* callee saves register			*/#define	fs18	f30	/* callee saves register			*/#define	fs19	f31	/* callee saves register			*/#define	pone	p0	/* permanent one predicate			*/#define	pTrue	p0	/* permanent one predicate			*/#define	ps0		p1	/* callee saves predicate			*/#define	ps1		p2	/* callee saves predicate			*/#define	ps2		p3	/* callee saves predicate			*/#define	ps3		p4	/* callee saves predicate			*/#define	ps4		p5	/* callee saves predicate			*/#define	pt0		p6	/* caller saves predicate			*/#define	pt1		p7	/* caller saves predicate			*/#define	pt2		p8	/* caller saves predicate			*/#define	pt3		p9	/* caller saves predicate			*/#define	pt4		p10	/* caller saves predicate			*/#define	pt5		p11	/* caller saves predicate			*/#define	pt6		p12	/* caller saves predicate			*/#define	pt7		p13	/* caller saves predicate			*/#define	pt8		p14	/* caller saves predicate			*/#define	pt9		p15	/* caller saves predicate			*/#define	rb		b0	/* return link						*/#define	bs0		b1	/* callee saves branch register		*/#define	bs1		b2	/* callee saves branch register		*/#define	bs2		b3	/* callee saves branch register		*/#define	bs3		b4	/* callee saves branch register		*/#define	bs4		b5	/* callee saves branch register		*/	#define	bt0		b6	/* caller saves branch register		*/#define	bt1		b7	/* caller saves branch register		*/			.text#define	CHARGE		t10#define	FACTION		t9#define	FActII		loc0#define	FActIX		fs1#define	FActIY		fs2#define	FActIZ		fs3#define	FIX			fa0#define	FIY			fa1#define	FIZ			fa2#define	FSHIFT		t6#define	FShiftIS	loc1#define	FShiftX		fs4#define	FShiftY		fs5#define	FShiftZ		fs6#define	Facel		fa7#define	ICharge		fa6#define	InnerCnt	t17#define	II			t13#define	II3			in7#define	IQ			fa6#define	IS			t12#define	IS3			in6#define	IX			fa3#define	IY			fa4#define	IZ			fa5#define	In_FSHIFT	in6#define	In_GID		in7#define	In_IINR		in1#define	In_JINDEX	in2#define	In_JJNR		in3#define	In_NRI		in0#define	In_SHIFT	in4#define	In_SHIFTVEC	in5#define NRI			loc4#define IINR		loc5#define JINDEX		loc6#define JJNR		loc7#define SHIFT		loc8#define GID			loc9#define COUNT		loc10#define	JX			DX[0]#define	JY			DY[0]#define	JZ			DZ[0]#define	LCSave		at0#define	NJ0			t14#define	NJ1			t15#define	POSITION	t8#define	PRSave		at1#define	PosX		f88#define	PosY		f89#define	PosZ		f90#define	QCharge		Charge[0]#define	QQ			Charge[4]#define	RInvSqr		f87#define	SHIFTVEC	t5#define	VCPtr		ggid#define	VCTotal		fs0#define	VCoul		Charge[4]#define	argPtr		t11#define	chargePtr	v2#define	Tmp1		t0#define	Tmp2		t17#define	Tmp3		loc3#define	fHALF		ft0#define	f3_8		ft1#define f5_16		fs7#define	fillP0		v0#define	fillP1		v1#define NN0			t0#define NN1			loc3#define	ggid		loc2#define	gidPtr		t7#define	iinrPtr		t1#define	jindexPtr	t2#define	jjnrPtr		t3#define	jnr			t16#define	jnr3		v0#define	nriCount	t0#define	pCont		pt0#define	pDone		pt1#define	pJJNR		pt2#define	pMore		pt3#define	pLast		pt4#define	posPtr		v3#define	shX			fa3#define	shY			fa4#define	shZ			fa5#define	shiftPtr	t4#define	shiftVPtr	v1#define	spillPtr	v0#define	xPFS		at0#define Nouter      loc19#define Ninner      loc20#define OuterIter   loc21#define InnerIter   loc22#define VC          loc23	#define	EXP(n)					(0xffff + (n))#define	POS_STK_OFFSET			0x10#define	FACTION_STK_OFFSET		0x18#define	CHARGE_STK_OFFSET		0x20#define	FACEL_STK_OFFSET		0x28#define	KRF_STK_OFFSET			0x30#define	CRF_STK_OFFSET			0x38#define	VC_STK_OFFSET			0x40#define	TYPE_STK_OFFSET			0x48#define	NTYPE_STK_OFFSET		0x50#define	NBFP_STK_OFFSET			0x58#define	VNB_STK_OFFSET			0x60#define	TABSCALE_STK_OFFSET		0x68#define	VFTAB_STK_OFFSET		0x70#define	INVSQRTA_STK_OFFSET		0x78#define	DVDA_STK_OFFSET			0x80#define	GBTABSCALE_STK_OFFSET		0x88#define	GBTAB_STK_OFFSET		0x90#define	NTHREADS_STK_OFFSET		0x98#define	COUNT_STK_OFFSET		0xA0#define MTX_STK_OFFSET			0xA8#define OUTERITER_STK_OFFSET		0xB0#define INNERITER_STK_OFFSET		0xB8#define WORK_STK_OFFSET     		0xC0// Version without force calculation#define	_NINPUTS	8#define	_NLOCALS	24#define	_NOUTPUT	0#define	_NROTATE	8	.regstk	8, 24, 0, 8	.rotr	FActPtr[8]	.rotf	DX[8], DY[8], DZ[8], FActX[8], FActY[8], FActZ[8], Charge[8], RSqr[5], RInv[5], RInvT[3], RInvU[2], RInvErr[4]	.rotp	pPipe[8], pSQRT[3]#define	PIPE_DEPTH	8	.global nb_kernel100nf_ia64_double	.proc	nb_kernel100nf_ia64_double	.align	32nb_kernel100nf_ia64_double://	INIT 1	{	.mmi		alloc			xPFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE		mov				spillPtr = sp		mov				Tmp1 = EXP(-1)	}	{	.mfi		nop				0x0		nop				0x0		add				argPtr = FACEL_STK_OFFSET, sp	} ;;//	INIT 2	{	.mlx											ld8			Ninner = [argPtr], COUNT_STK_OFFSET - FACEL_STK_OFFSET		movl			Tmp2 = 0x3ec00000	}	{	.mii		stf.spill		[spillPtr] = fs0, -16		mov				LCSave = ar.lc		add				sp = -7 * 16, sp	} ;;//	INIT 3		{	.mlx											stf.spill		[spillPtr] = fs1, -16		movl			Tmp3 = 0x3ea00000	} 	{	.mmi										setf.exp		fHALF   = Tmp1		ld8				COUNT = [argPtr], POS_STK_OFFSET - COUNT_STK_OFFSET		mov				PRSave	= pr	} ;;//	INIT 4		{	.mmi											setf.d 			f3_8  = Tmp2		stf.spill		[spillPtr] = fs2, -16		mov				pr.rot	= 0x0	} ;;//  INIT 5		{	.mmi										stf.spill		[spillPtr] = fs3, -16		nop			0x0		mov			SHIFTVEC   = In_SHIFTVEC	} ;;//  INIT 6		{	.mmi		stf.spill		[spillPtr] = fs4, -16		ld8				POSITION = [argPtr], CHARGE_STK_OFFSET - POS_STK_OFFSET		mov				FSHIFT   = In_FSHIFT	} ;;//  INIT 7		{	.mmi		stf.spill		[spillPtr] = fs5, -16		ld8				CHARGE = [argPtr], FACTION_STK_OFFSET - CHARGE_STK_OFFSET		mov			SHIFT = In_SHIFT	}	{	.mmi		ldfd			Facel = [Ninner]		ld4			NRI = [In_NRI]		mov			GID = In_GID	} ;;//  INIT 8		{	.mmf		stf.spill		[spillPtr] = fs6, -16		ld8			FACTION = [argPtr], VC_STK_OFFSET - FACTION_STK_OFFSET		fnorm			Facel = Facel	}		{	.mii		mov			JJNR = In_JJNR		mov			IINR = In_IINR		mov			JINDEX = In_JINDEX	} ;;//  INIT 9			{	.mmf		stf.spill		[spillPtr] = fs7		ld8			VC = [argPtr], OUTERITER_STK_OFFSET - VC_STK_OFFSET		fnorm			f3_8 = f3_8	} ;;//  INIT 10	{	.mmi		ld8			OuterIter = [argPtr], INNERITER_STK_OFFSET - OUTERITER_STK_OFFSET                setf.d                  f5_16      = Tmp3		mov			Nouter = 0	} ;;//  INIT 11	{	.mfi		ld8			InnerIter = [argPtr]                fnorm                   f5_16 = f5_16		mov			Ninner = 0	} ;;//  16 bundles used for init - still aligned.	threadLoop_nf:

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?