ieee754-df.s

来自「俄罗斯高人Mamaich的Pocket gcc编译器(运行在PocketPC上)」· S 代码 · 共 1,226 行 · 第 1/2 页

S
1,226
字号
/* ieee754-df.S double-precision floating point support for ARM   Copyright (C) 2003, 2004  Free Software Foundation, Inc.   Contributed by Nicolas Pitre (nico@cam.org)   This file is free software; you can redistribute it and/or modify it   under the terms of the GNU General Public License as published by the   Free Software Foundation; either version 2, or (at your option) any   later version.   In addition to the permissions in the GNU General Public License, the   Free Software Foundation gives you unlimited permission to link the   compiled version of this file into combinations with other programs,   and to distribute those combinations without any restriction coming   from the use of this file.  (The General Public License restrictions   do apply in other respects; for example, they cover modification of   the file, and distribution when not linked into a combine   executable.)   This file is distributed in the hope that it will be useful, but   WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU   General Public License for more details.   You should have received a copy of the GNU General Public License   along with this program; see the file COPYING.  If not, write to   the Free Software Foundation, 59 Temple Place - Suite 330,   Boston, MA 02111-1307, USA.  *//* * Notes:  *  * The goal of this code is to be as fast as possible.  This is * not meant to be easy to understand for the casual reader. * For slightly simpler code please see the single precision version * of this file. *  * Only the default rounding mode is intended for best performances. * Exceptions aren't supported yet, but that can be added quite easily * if necessary without impacting performances. */@ For FPA, float words are always big-endian.@ For VFP, floats words follow the memory system mode.#if 1/* defined(__VFP_FP__) && !defined(__ARMEB__) */#define xl r0#define xh r1#define yl r2#define yh r3#else#define xh r0#define xl r1#define yh r2#define yl r3#endif#ifdef L_negdf2ARM_FUNC_START negdf2	@ flip sign bit	eor	xh, xh, #0x80000000	RET	FUNC_END negdf2#endif#ifdef L_addsubdf3ARM_FUNC_START subdf3	@ flip sign bit of second arg	eor	yh, yh, #0x80000000#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)	b	1f			@ Skip Thumb-code prologue#endifARM_FUNC_START adddf31:	@ Compare both args, return zero if equal but the sign.	teq	xl, yl	eoreq	ip, xh, yh	teqeq	ip, #0x80000000	beq	LSYM(Lad_z)	@ If first arg is 0 or -0, return second arg.	@ If second arg is 0 or -0, return first arg.	orrs	ip, xl, xh, lsl #1	moveq	xl, yl	moveq	xh, yh	orrnes	ip, yl, yh, lsl #1	RETc(eq)	stmfd	sp!, {r4, r5, lr}	@ Mask out exponents.	mov	ip, #0x7f000000	orr	ip, ip, #0x00f00000	and	r4, xh, ip	and	r5, yh, ip	@ If either of them is 0x7ff, result will be INF or NAN	teq	r4, ip	teqne	r5, ip	beq	LSYM(Lad_i)	@ Compute exponent difference.  Make largest exponent in r4,	@ corresponding arg in xh-xl, and positive exponent difference in r5.	subs	r5, r5, r4	rsblt	r5, r5, #0	ble	1f	add	r4, r4, r5	eor	yl, xl, yl	eor	yh, xh, yh	eor	xl, yl, xl	eor	xh, yh, xh	eor	yl, xl, yl	eor	yh, xh, yh1:	@ If exponent difference is too large, return largest argument	@ already in xh-xl.  We need up to 54 bit to handle proper rounding	@ of 0x1p54 - 1.1.	cmp	r5, #(54 << 20)	RETLDM	"r4, r5" hi	@ Convert mantissa to signed integer.	tst	xh, #0x80000000	bic	xh, xh, ip, lsl #1	orr	xh, xh, #0x00100000	beq	1f	rsbs	xl, xl, #0	rsc	xh, xh, #01:	tst	yh, #0x80000000	bic	yh, yh, ip, lsl #1	orr	yh, yh, #0x00100000	beq	1f	rsbs	yl, yl, #0	rsc	yh, yh, #01:	@ If exponent == difference, one or both args were denormalized.	@ Since this is not common case, rescale them off line.	teq	r4, r5	beq	LSYM(Lad_d)LSYM(Lad_x):	@ Scale down second arg with exponent difference.	@ Apply shift one bit left to first arg and the rest to second arg	@ to simplify things later, but only if exponent does not become 0.	mov	ip, #0	movs	r5, r5, lsr #20	beq	3f	teq	r4, #(1 << 20)	beq	1f	movs	xl, xl, lsl #1	adc	xh, ip, xh, lsl #1	sub	r4, r4, #(1 << 20)	subs	r5, r5, #1	beq	3f	@ Shift yh-yl right per r5, keep leftover bits into ip.1:	rsbs	lr, r5, #32	blt	2f	mov	ip, yl, lsl lr	mov	yl, yl, lsr r5	orr	yl, yl, yh, lsl lr	mov	yh, yh, asr r5	b	3f2:	sub	r5, r5, #32	add	lr, lr, #32	cmp	yl, #1	adc	ip, ip, yh, lsl lr	mov	yl, yh, asr r5	mov	yh, yh, asr #323:	@ the actual addition	adds	xl, xl, yl	adc	xh, xh, yh	@ We now have a result in xh-xl-ip.	@ Keep absolute value in xh-xl-ip, sign in r5.	ands	r5, xh, #0x80000000	bpl	LSYM(Lad_p)	rsbs	ip, ip, #0	rscs	xl, xl, #0	rsc	xh, xh, #0	@ Determine how to normalize the result.LSYM(Lad_p):	cmp	xh, #0x00100000	bcc	LSYM(Lad_l)	cmp	xh, #0x00200000	bcc	LSYM(Lad_r0)	cmp	xh, #0x00400000	bcc	LSYM(Lad_r1)	@ Result needs to be shifted right.	movs	xh, xh, lsr #1	movs	xl, xl, rrx	movs	ip, ip, rrx	orrcs	ip, ip, #1	add	r4, r4, #(1 << 20)LSYM(Lad_r1):	movs	xh, xh, lsr #1	movs	xl, xl, rrx	movs	ip, ip, rrx	orrcs	ip, ip, #1	add	r4, r4, #(1 << 20)	@ Our result is now properly aligned into xh-xl, remaining bits in ip.	@ Round with MSB of ip. If halfway between two numbers, round towards	@ LSB of xl = 0.LSYM(Lad_r0):	adds	xl, xl, ip, lsr #31	adc	xh, xh, #0	teq	ip, #0x80000000	biceq	xl, xl, #1	@ One extreme rounding case may add a new MSB.  Adjust exponent.	@ That MSB will be cleared when exponent is merged below. 	tst	xh, #0x00200000	addne	r4, r4, #(1 << 20)	@ Make sure we did not bust our exponent.	adds	ip, r4, #(1 << 20)	bmi	LSYM(Lad_o)	@ Pack final result together.LSYM(Lad_e):	bic	xh, xh, #0x00300000	orr	xh, xh, r4	orr	xh, xh, r5	RETLDM	"r4, r5"LSYM(Lad_l):	@ Result must be shifted left and exponent adjusted.	@ No rounding necessary since ip will always be 0.#if __ARM_ARCH__ < 5	teq	xh, #0	movne	r3, #-11	moveq	r3, #21	moveq	xh, xl	moveq	xl, #0	mov	r2, xh	movs	ip, xh, lsr #16	moveq	r2, r2, lsl #16	addeq	r3, r3, #16	tst	r2, #0xff000000	moveq	r2, r2, lsl #8	addeq	r3, r3, #8	tst	r2, #0xf0000000	moveq	r2, r2, lsl #4	addeq	r3, r3, #4	tst	r2, #0xc0000000	moveq	r2, r2, lsl #2	addeq	r3, r3, #2	tst	r2, #0x80000000	addeq	r3, r3, #1#else	teq	xh, #0	moveq	xh, xl	moveq	xl, #0	clz	r3, xh	addeq	r3, r3, #32	sub	r3, r3, #11#endif	@ determine how to shift the value.	subs	r2, r3, #32	bge	2f	adds	r2, r2, #12	ble	1f	@ shift value left 21 to 31 bits, or actually right 11 to 1 bits	@ since a register switch happened above.	add	ip, r2, #20	rsb	r2, r2, #12	mov	xl, xh, lsl ip	mov	xh, xh, lsr r2	b	3f	@ actually shift value left 1 to 20 bits, which might also represent	@ 32 to 52 bits if counting the register switch that happened earlier.1:	add	r2, r2, #202:	rsble	ip, r2, #32	mov	xh, xh, lsl r2	orrle	xh, xh, xl, lsr ip	movle	xl, xl, lsl r2	@ adjust exponent accordingly.3:	subs	r4, r4, r3, lsl #20	bgt	LSYM(Lad_e)	@ Exponent too small, denormalize result.	@ Find out proper shift value.	mvn	r4, r4, asr #20	subs	r4, r4, #30	bge	2f	adds	r4, r4, #12	bgt	1f	@ shift result right of 1 to 20 bits, sign is in r5.	add	r4, r4, #20	rsb	r2, r4, #32	mov	xl, xl, lsr r4	orr	xl, xl, xh, lsl r2	orr	xh, r5, xh, lsr r4	RETLDM	"r4, r5"	@ shift result right of 21 to 31 bits, or left 11 to 1 bits after	@ a register switch from xh to xl.1:	rsb	r4, r4, #12	rsb	r2, r4, #32	mov	xl, xl, lsr r2	orr	xl, xl, xh, lsl r4	mov	xh, r5	RETLDM	"r4, r5"	@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch	@ from xh to xl.2:	mov	xl, xh, lsr r4	mov	xh, r5	RETLDM	"r4, r5"	@ Adjust exponents for denormalized arguments.LSYM(Lad_d):	teq	r4, #0	eoreq	xh, xh, #0x00100000	addeq	r4, r4, #(1 << 20)	eor	yh, yh, #0x00100000	subne	r5, r5, #(1 << 20)	b	LSYM(Lad_x)	@ Result is x - x = 0, unless x = INF or NAN.LSYM(Lad_z):	sub	ip, ip, #0x00100000	@ ip becomes 0x7ff00000	and	r2, xh, ip	teq	r2, ip	orreq	xh, ip, #0x00080000	movne	xh, #0	mov	xl, #0	RET	@ Overflow: return INF.LSYM(Lad_o):	orr	xh, r5, #0x7f000000	orr	xh, xh, #0x00f00000	mov	xl, #0	RETLDM	"r4, r5"	@ At least one of x or y is INF/NAN.	@   if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)	@   if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)	@   if either is NAN: return NAN	@   if opposite sign: return NAN	@   return xh-xl (which is INF or -INF)LSYM(Lad_i):	teq	r4, ip	movne	xh, yh	movne	xl, yl	teqeq	r5, ip	RETLDM	"r4, r5" ne	orrs	r4, xl, xh, lsl #12	orreqs	r4, yl, yh, lsl #12	teqeq	xh, yh	orrne	xh, r5, #0x00080000	movne	xl, #0	RETLDM	"r4, r5"	FUNC_END subdf3	FUNC_END adddf3ARM_FUNC_START floatunsidf	teq	r0, #0	moveq	r1, #0	RETc(eq)	stmfd	sp!, {r4, r5, lr}	mov	r4, #(0x400 << 20)	@ initial exponent	add	r4, r4, #((52-1) << 20)	mov	r5, #0			@ sign bit is 0	mov	xl, r0	mov	xh, #0	b	LSYM(Lad_l)	FUNC_END floatunsidfARM_FUNC_START floatsidf	teq	r0, #0	moveq	r1, #0	RETc(eq)	stmfd	sp!, {r4, r5, lr}	mov	r4, #(0x400 << 20)	@ initial exponent	add	r4, r4, #((52-1) << 20)	ands	r5, r0, #0x80000000	@ sign bit in r5	rsbmi	r0, r0, #0		@ absolute value	mov	xl, r0	mov	xh, #0	b	LSYM(Lad_l)	FUNC_END floatsidfARM_FUNC_START extendsfdf2	movs	r2, r0, lsl #1	beq	1f			@ value is 0.0 or -0.0	mov	xh, r2, asr #3		@ stretch exponent	mov	xh, xh, rrx		@ retrieve sign bit	mov	xl, r2, lsl #28		@ retrieve remaining bits	ands	r2, r2, #0xff000000	@ isolate exponent	beq	2f			@ exponent was 0 but not mantissa	teq	r2, #0xff000000		@ check if INF or NAN	eorne	xh, xh, #0x38000000	@ fixup exponent otherwise.	RET1:	mov	xh, r0	mov	xl, #0	RET2:	@ value was denormalized.  We can normalize it now.	stmfd	sp!, {r4, r5, lr}	mov	r4, #(0x380 << 20)	@ setup corresponding exponent	add	r4, r4, #(1 << 20)	and	r5, xh, #0x80000000	@ move sign bit in r5	bic	xh, xh, #0x80000000	b	LSYM(Lad_l)	FUNC_END extendsfdf2#endif /* L_addsubdf3 */#ifdef L_muldivdf3ARM_FUNC_START muldf3	stmfd	sp!, {r4, r5, r6, lr}	@ Mask out exponents.	mov	ip, #0x7f000000	orr	ip, ip, #0x00f00000	and	r4, xh, ip	and	r5, yh, ip	@ Trap any INF/NAN.	teq	r4, ip	teqne	r5, ip	beq	LSYM(Lml_s)	@ Trap any multiplication by 0.	orrs	r6, xl, xh, lsl #1	orrnes	r6, yl, yh, lsl #1	beq	LSYM(Lml_z)	@ Shift exponents right one bit to make room for overflow bit.	@ If either of them is 0, scale denormalized arguments off line.	@ Then add both exponents together.	movs	r4, r4, lsr #1	teqne	r5, #0	beq	LSYM(Lml_d)LSYM(Lml_x):	add	r4, r4, r5, asr #1	@ Preserve final sign in r4 along with exponent for now.	teq	xh, yh	orrmi	r4, r4, #0x8000	@ Convert mantissa to unsigned integer.	bic	xh, xh, ip, lsl #1	bic	yh, yh, ip, lsl #1	orr	xh, xh, #0x00100000	orr	yh, yh, #0x00100000#if __ARM_ARCH__ < 4	@ Well, no way to make it shorter without the umull instruction.	@ We must perform that 53 x 53 bit multiplication by hand.	stmfd	sp!, {r7, r8, r9, sl, fp}	mov	r7, xl, lsr #16	mov	r8, yl, lsr #16	mov	r9, xh, lsr #16	mov	sl, yh, lsr #16	bic	xl, xl, r7, lsl #16	bic	yl, yl, r8, lsl #16	bic	xh, xh, r9, lsl #16	bic	yh, yh, sl, lsl #16	mul	ip, xl, yl	mul	fp, xl, r8	mov	lr, #0	adds	ip, ip, fp, lsl #16	adc	lr, lr, fp, lsr #16	mul	fp, r7, yl	adds	ip, ip, fp, lsl #16	adc	lr, lr, fp, lsr #16	mul	fp, xl, sl	mov	r5, #0	adds	lr, lr, fp, lsl #16	adc	r5, r5, fp, lsr #16	mul	fp, r7, yh	adds	lr, lr, fp, lsl #16	adc	r5, r5, fp, lsr #16	mul	fp, xh, r8	adds	lr, lr, fp, lsl #16	adc	r5, r5, fp, lsr #16	mul	fp, r9, yl	adds	lr, lr, fp, lsl #16	adc	r5, r5, fp, lsr #16	mul	fp, xh, sl	mul	r6, r9, sl	adds	r5, r5, fp, lsl #16	adc	r6, r6, fp, lsr #16	mul	fp, r9, yh	adds	r5, r5, fp, lsl #16	adc	r6, r6, fp, lsr #16	mul	fp, xl, yh	adds	lr, lr, fp	mul	fp, r7, sl	adcs	r5, r5, fp	mul	fp, xh, yl	adc	r6, r6, #0	adds	lr, lr, fp	mul	fp, r9, r8	adcs	r5, r5, fp	mul	fp, r7, r8	adc	r6, r6, #0	adds	lr, lr, fp	mul	fp, xh, yh	adcs	r5, r5, fp	adc	r6, r6, #0	ldmfd	sp!, {r7, r8, r9, sl, fp}#else	@ Here is the actual multiplication: 53 bits * 53 bits -> 106 bits.	umull	ip, lr, xl, yl	mov	r5, #0	umlal	lr, r5, xl, yh	umlal	lr, r5, xh, yl	mov	r6, #0	umlal	r5, r6, xh, yh#endif	@ The LSBs in ip are only significant for the final rounding.	@ Fold them into one bit of lr.	teq	ip, #0	orrne	lr, lr, #1	@ Put final sign in xh.	mov	xh, r4, lsl #16	bic	r4, r4, #0x8000	@ Adjust result if one extra MSB appeared (one of four times).	tst	r6, #(1 << 9)	beq	1f	add	r4, r4, #(1 << 19)	movs	r6, r6, lsr #1	movs	r5, r5, rrx	movs	lr, lr, rrx	orrcs	lr, lr, #11:	@ Scale back to 53 bits.	@ xh contains sign bit already.	orr	xh, xh, r6, lsl #12	orr	xh, xh, r5, lsr #20	mov	xl, r5, lsl #12	orr	xl, xl, lr, lsr #20	@ Apply exponent bias, check range for underflow.	sub	r4, r4, #0x00f80000	subs	r4, r4, #0x1f000000	ble	LSYM(Lml_u)	@ Round the result.	movs	lr, lr, lsl #12	bpl	1f	adds	xl, xl, #1	adc	xh, xh, #0	teq	lr, #0x80000000	biceq	xl, xl, #1	@ Rounding may have produced an extra MSB here.	@ The extra bit is cleared before merging the exponent below.	tst	xh, #0x00200000	addne	r4, r4, #(1 << 19)1:	@ Check exponent for overflow.	adds	ip, r4, #(1 << 19)	tst	ip, #(1 << 30)	bne	LSYM(Lml_o)	@ Add final exponent.	bic	xh, xh, #0x00300000	orr	xh, xh, r4, lsl #1	RETLDM	"r4, r5, r6"	@ Result is 0, but determine sign anyway.LSYM(Lml_z):	eor	xh, xh, yhLSYM(Ldv_z):	bic	xh, xh, #0x7fffffff	mov	xl, #0	RETLDM	"r4, r5, r6"	@ Check if denormalized result is possible, otherwise return signed 0.LSYM(Lml_u):	cmn	r4, #(53 << 19)	movle	xl, #0	bicle	xh, xh, #0x7fffffff

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?