📄 r_sqrt_.s

📁 操作系统SunOS 4.1.3版本的源码
💻 S
字号:
	.seg	"data"	.asciz	"@(#)r_sqrt_.S 1.1 92/07/30 SMI"#define LOCORE#include <machine/asm_linkage.h>! Copyright (c) 1988 by Sun Microsystems, Inc.!! Fortran single _r_sqrt_(x) ! Code by K.C. Ng, Dec 2, 1986, revised on May 13 (Friday), 1988.!! _r_sqrt_(x) returns the correctly rounded (according to the prevailing ! rounding mode) square root of x. !! Algorithm:!	0. Disable inexact trap and set rounding mode to RN!	1. Shift and add to get 7.8 bits approximation of 1/sqrt(x).!	2. Apply reciproot iteration once in single precision, and then !	   twice in double precision (with some adjustment, see the double!	   "_sqrt") and multiply the reciproot by x to get a near perfect !	   approximation of sqrt(x) in double precision.!	3. Restore rounding mode and inexact enable flag!	4. Round the double precision answer to single precision.!! Note. The algorithm has been tested on a SUN-3 for all single precision!	arguments between 1.0 and 4.0.!	.seg	"data"	.align	8constant:const1	= 0x0	.word   0x3ff7ffff,0xffc00000   ! double 1.5-2**-30half	= 0x8	.word	0x3fe00000,0		! double 0.5fone	= 0x10	.word	0x3f800000	! 1.0two54	= 0x14	.word	0x5a800000	! 2**54twom27	= 0x18	.word	0x32000000	! 2**-27fhalf	= 0x1c	.word	0x3f000000	! 0.5fohalf	= 0x20	.word	0x3fc00000	! 1.5 table	= 0x24	.word	0x1500<<3,0x2ef8<<3,0x4d67<<3,0x6b02<<3	.word	0x87be<<3,0xa395<<3,0xbe7a<<3,0xd866<<3	.word	0xf14a<<3,0x1091b<<3,0x11fcd<<3,0x13552<<3	.word	0x14999<<3,0x15c98<<3,0x16e34<<3	.word	0x17e5f<<3,0x18d03<<3,0x19a01<<3,0x1a545<<3	.word	0x1ae8a<<3,0x1b5c4<<3,0x1bb01<<3	.word	0x1bfde<<3,0x1c28d<<3,0x1c2de<<3,0x1c0db<<3	.word	0x1ba7e<<3,0x1b11c<<3,0x1a4b5<<3	.word	0x1953d<<3,0x18266<<3,0x16be0<<3,0x1683e<<3	.word	0x179d8<<3,0x18a4d<<3,0x19992<<3	.word	0x1a789<<3,0x1b445<<3,0x1bf61<<3,0x1c989<<3	.word	0x1d16d<<3,0x1d77b<<3,0x1dddf<<3	.word	0x1e2ad<<3,0x1e5bf<<3,0x1e6e8<<3,0x1e654<<3	.word	0x1e3cd<<3,0x1df2a<<3,0x1d635<<3	.word	0x1cb16<<3,0x1be2c<<3,0x1ae4e<<3,0x19bde<<3	.word	0x1868e<<3,0x16e2e<<3,0x1527f<<3	.word	0x1334a<<3,0x11051<<3,0xe951<<3,0xbe01<<3	.word	0x8e0d<<3,0x5924<<3,0x1edd<<3! local variable using fpx	= -0x8tmp	= -0x10ofsr	= -0x18nfsr	= -0x1c! register usage! i0 = x! i1 = |x|	.seg	"text"	ENTRY(r_sqrt_)inrsqrt:	save	%sp,-128,%sp	set	constant,%l0	ld	[%i0],%f0	ld	[%i0],%i0	sethi	%hi(0x80000000),%o3	! o3 = 0x80000000	andn	%i0,%o3,%i1		! i1 = |x|	sethi	%hi(0x3f800800),%o4	! o4 = 1+2**-12	cmp	%i1,%o4	bl	1f	sethi	%hi(0x3f7ff000),%o4	! o4 = 1-2**-12	sethi	%hi(0x7f800000),%o4	cmp	%i1,%o4	bl	normal	tst	%i0	bl	NaN	nop    ! x is inf or NaN	ba	rsqrt_return	fadds	%f0,%f0,%f01:	cmp	%i1,%o4	bg	quickcrank	tst	%i0	tst	%i1	bne	1f	tst	%i0    ! x = +-0, return x	ba	rsqrt_return	nop1:	bl	NaN	sethi	%hi(0x01000000),%o4	cmp	%i1,%o4	bg	normal	tst	%i0    ! x is too tiny, may be subnormal	ld	[%l0+two54],%f1	fmuls	%f0,%f1,%f0		! scale up x	st	%f0,[%fp+x]	call	inrsqrt			! call rsqrt again	add	%fp,x,%o0	ld	[%l0+twom27],%f2	fmuls	%f0,%f2,%f0		! scale back sqrt(x)	ba	rsqrt_return	nopquickcrank:    ! now |x| is between 0x3f7ff0000 and 0x3f800800	bl	NaN	sethi	%hi(0x3f800000),%o3	cmp	%i0,%o3	bne,a	1f	ld	[%l0+twom27],%f2	! f2 = 2**-27	ba	rsqrt_return	nop1:	sra	%i0,1,%o1	sethi	%hi(0x00400000),%o2	sub	%o1,%o2,%o1	sethi	%hi(0x20000000),%o2	or	%o1,%o2,%o1		! o1 = 1+(x-1)/2	st	%o1,[%fp+x]	andcc	%i0,1,%g0	bne	odd	ld	[%fp+x],%f0	fnegs	%f2,%f2			! even odd:	ba	rsqrt_return	fadds	%f0,%f2,%f0normal:	bl	NaN    ! save RD, NX flags; reset RD to RN; disable NX;  save f4-f11	st	%fsr,[%fp+ofsr]		! save fsr 	ld	[%fp+ofsr],%o4		! o4 = fsr	sethi	%hi(0xc8000000),%o3	! set mask to reset NX and RD	andn	%o4,%o3,%o4		! reset o4 (fsr's copy)	st	%o4,[%fp+nfsr]	ld	[%fp+nfsr],%fsr		! disable NX and set RD to RN    ! initial approximation for 1/sqrt(x)	srl	%i0,1,%o0		! high x >> 1	ld	[%l0+fhalf],%f2		! f2 = 0.5	fmuls	%f2,%f0,%f1		! f1 = 0.5*x, f0=x 	sethi	%hi(0x5f400000),%o1	! offset constant	sub	%o1,%o0,%o0		! o0 = k = 0x5f400000 - (hx >> 1)	add	%l0,table,%o2		! o2 = address of data table	srl	%o0,15,%o1		! o1 := 4*(k >> 17)	and	%o1,0xfc,%o1		! o1 := 4*[63&(o0>>17)]	add	%o2,%o1,%o2		! o2 = address of the constant	ld	[%o2],%o3		! o3 = adjustment for the approximation	sub	%o0,%o3,%o0		! y approximates 1/sqrt(x) to 7.8 bits	st	%o0,[%fp+tmp]		! tmp = y	ld	[%fp+tmp],%f2		! f2 = initial approx. of 1/sqrt(x)    ! reciproot iteration once	fmuls	%f1,%f2,%f3		! f3 = (0.5*x)*y	fstod	%f1,%f6			! set f6 = double x*0.5	fmuls	%f2,%f3,%f3		! f3 = ((0.5*x)*y)*y	ld	[%l0+fohalf],%f4	! f4 = 1.5	fsubs	%f4,%f3,%f4		! 1.5-0.5*x*y*y	fmuls	%f2,%f4,%f2		! new y = old y*(1.5-0.5*x*y*y)    ! reciproot iteration twice in double	fstod	%f2,%f8			! f8 = double y	fmuld	%f8,%f8,%f4		! f4 = y*y	fmuld	%f6,%f4,%f4		! f4 = 0.5*x*y*y	ldd	[%l0+const1],%f10	fsubd	%f10,%f4,%f4		! f4 = (1.5-2**-30)-0.5*x*y*y	fmuld	%f8,%f4,%f8		! f8 = new y     ! reciproot iteration thrice in double	fstod	%f0,%f10		! f10= (double x)	fmuld	%f10,%f8,%f4		! f4 = z = x*y	std	%f8,[%fp+tmp]	ld	[%fp+tmp],%o4	sethi	%hi(0x00100000),%o3	sub	%o4,%o3,%o4	st	%o4,[%fp+tmp]	ldd	[%fp+tmp],%f12		! f12= 0.5*y	fmuld	%f12,%f4,%f2		! f2 = 0.5*z*y	ldd	[%l0+half],%f6		! f4 = 0.5	fsubd	%f6,%f2,%f2		! f2 = 0.5 - 0.5*z*y	fmuld	%f4,%f2,%f2	faddd	%f4,%f2,%f2		! f2 = z + z*(0.5-0.5*z*y)    ! restore fsr	ld	[%fp+ofsr],%fsr    ! convert the double precision result to single with adjustment	fdtos	%f2,%f0rsqrt_return:	ret	restoreNaN:	fsubs	%f0,%f0,%f0		! f0 is either 0 or NaN 	ba	rsqrt_return	fdivs	%f0,%f0,%f0		! f0 is NaN with signal unless x is NaN
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -