📄 ieee754-sf.s

📁 linux下编程用编译软件
💻 S
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* ieee754-sf.S single-precision floating point support for ARM   Copyright (C) 2003, 2004, 2005  Free Software Foundation, Inc.   Contributed by Nicolas Pitre (nico@cam.org)   This file is free software; you can redistribute it and/or modify it   under the terms of the GNU General Public License as published by the   Free Software Foundation; either version 2, or (at your option) any   later version.   In addition to the permissions in the GNU General Public License, the   Free Software Foundation gives you unlimited permission to link the   compiled version of this file into combinations with other programs,   and to distribute those combinations without any restriction coming   from the use of this file.  (The General Public License restrictions   do apply in other respects; for example, they cover modification of   the file, and distribution when not linked into a combine   executable.)   This file is distributed in the hope that it will be useful, but   WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU   General Public License for more details.   You should have received a copy of the GNU General Public License   along with this program; see the file COPYING.  If not, write to   the Free Software Foundation, 51 Franklin Street, Fifth Floor,   Boston, MA 02110-1301, USA.  *//* * Notes: * * The goal of this code is to be as fast as possible.  This is * not meant to be easy to understand for the casual reader. * * Only the default rounding mode is intended for best performances. * Exceptions aren't supported yet, but that can be added quite easily * if necessary without impacting performances. */#ifdef L_negsf2	ARM_FUNC_START negsf2ARM_FUNC_ALIAS aeabi_fneg negsf2	eor	r0, r0, #0x80000000	@ flip sign bit	RET	FUNC_END aeabi_fneg	FUNC_END negsf2#endif#ifdef L_addsubsf3ARM_FUNC_START aeabi_frsub	eor	r0, r0, #0x80000000	@ flip sign bit of first arg	b	1fARM_FUNC_START subsf3ARM_FUNC_ALIAS aeabi_fsub subsf3	eor	r1, r1, #0x80000000	@ flip sign bit of second arg#if defined(__INTERWORKING_STUBS__)	b	1f			@ Skip Thumb-code prologue#endifARM_FUNC_START addsf3ARM_FUNC_ALIAS aeabi_fadd addsf31:	@ Look for zeroes, equal values, INF, or NAN.	movs	r2, r0, lsl #1	movnes	r3, r1, lsl #1	teqne	r2, r3	mvnnes	ip, r2, asr #24	mvnnes	ip, r3, asr #24	beq	LSYM(Lad_s)	@ Compute exponent difference.  Make largest exponent in r2,	@ corresponding arg in r0, and positive exponent difference in r3.	mov	r2, r2, lsr #24	rsbs	r3, r2, r3, lsr #24	addgt	r2, r2, r3	eorgt	r1, r0, r1	eorgt	r0, r1, r0	eorgt	r1, r0, r1	rsblt	r3, r3, #0	@ If exponent difference is too large, return largest argument	@ already in r0.  We need up to 25 bit to handle proper rounding	@ of 0x1p25 - 1.1.	cmp	r3, #25	RETc(hi)	@ Convert mantissa to signed integer.	tst	r0, #0x80000000	orr	r0, r0, #0x00800000	bic	r0, r0, #0xff000000	rsbne	r0, r0, #0	tst	r1, #0x80000000	orr	r1, r1, #0x00800000	bic	r1, r1, #0xff000000	rsbne	r1, r1, #0	@ If exponent == difference, one or both args were denormalized.	@ Since this is not common case, rescale them off line.	teq	r2, r3	beq	LSYM(Lad_d)LSYM(Lad_x):	@ Compensate for the exponent overlapping the mantissa MSB added later	sub	r2, r2, #1	@ Shift and add second arg to first arg in r0.	@ Keep leftover bits into r1.	adds	r0, r0, r1, asr r3	rsb	r3, r3, #32	mov	r1, r1, lsl r3	@ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)	and	r3, r0, #0x80000000	bpl	LSYM(Lad_p)	rsbs	r1, r1, #0	rsc	r0, r0, #0	@ Determine how to normalize the result.LSYM(Lad_p):	cmp	r0, #0x00800000	bcc	LSYM(Lad_a)	cmp	r0, #0x01000000	bcc	LSYM(Lad_e)	@ Result needs to be shifted right.	movs	r0, r0, lsr #1	mov	r1, r1, rrx	add	r2, r2, #1	@ Make sure we did not bust our exponent.	cmp	r2, #254	bhs	LSYM(Lad_o)	@ Our result is now properly aligned into r0, remaining bits in r1.	@ Pack final result together.	@ Round with MSB of r1. If halfway between two numbers, round towards	@ LSB of r0 = 0. LSYM(Lad_e):	cmp	r1, #0x80000000	adc	r0, r0, r2, lsl #23	biceq	r0, r0, #1	orr	r0, r0, r3	RET	@ Result must be shifted left and exponent adjusted.LSYM(Lad_a):	movs	r1, r1, lsl #1	adc	r0, r0, r0	tst	r0, #0x00800000	sub	r2, r2, #1	bne	LSYM(Lad_e)		@ No rounding necessary since r1 will always be 0 at this point.LSYM(Lad_l):#if __ARM_ARCH__ < 5	movs	ip, r0, lsr #12	moveq	r0, r0, lsl #12	subeq	r2, r2, #12	tst	r0, #0x00ff0000	moveq	r0, r0, lsl #8	subeq	r2, r2, #8	tst	r0, #0x00f00000	moveq	r0, r0, lsl #4	subeq	r2, r2, #4	tst	r0, #0x00c00000	moveq	r0, r0, lsl #2	subeq	r2, r2, #2	cmp	r0, #0x00800000	movcc	r0, r0, lsl #1	sbcs	r2, r2, #0#else	clz	ip, r0	sub	ip, ip, #8	subs	r2, r2, ip	mov	r0, r0, lsl ip#endif	@ Final result with sign	@ If exponent negative, denormalize result.	addge	r0, r0, r2, lsl #23	rsblt	r2, r2, #0	orrge	r0, r0, r3	orrlt	r0, r3, r0, lsr r2	RET	@ Fixup and adjust bit position for denormalized arguments.	@ Note that r2 must not remain equal to 0.LSYM(Lad_d):	teq	r2, #0	eor	r1, r1, #0x00800000	eoreq	r0, r0, #0x00800000	addeq	r2, r2, #1	subne	r3, r3, #1	b	LSYM(Lad_x)LSYM(Lad_s):	mov	r3, r1, lsl #1	mvns	ip, r2, asr #24	mvnnes	ip, r3, asr #24	beq	LSYM(Lad_i)	teq	r2, r3	beq	1f	@ Result is x + 0.0 = x or 0.0 + y = y.	teq	r2, #0	moveq	r0, r1	RET1:	teq	r0, r1	@ Result is x - x = 0.	movne	r0, #0	RETc(ne)	@ Result is x + x = 2x.	tst	r2, #0xff000000	bne	2f	movs	r0, r0, lsl #1	orrcs	r0, r0, #0x80000000	RET2:	adds	r2, r2, #(2 << 24)	addcc	r0, r0, #(1 << 23)	RETc(cc)	and	r3, r0, #0x80000000	@ Overflow: return INF.LSYM(Lad_o):	orr	r0, r3, #0x7f000000	orr	r0, r0, #0x00800000	RET	@ At least one of r0/r1 is INF/NAN.	@   if r0 != INF/NAN: return r1 (which is INF/NAN)	@   if r1 != INF/NAN: return r0 (which is INF/NAN)	@   if r0 or r1 is NAN: return NAN	@   if opposite sign: return NAN	@   otherwise return r0 (which is INF or -INF)LSYM(Lad_i):	mvns	r2, r2, asr #24	movne	r0, r1	mvneqs	r3, r3, asr #24	movne	r1, r0	movs	r2, r0, lsl #9	moveqs	r3, r1, lsl #9	teqeq	r0, r1	orrne	r0, r0, #0x00400000	@ quiet NAN	RET	FUNC_END aeabi_frsub	FUNC_END aeabi_fadd	FUNC_END addsf3	FUNC_END aeabi_fsub	FUNC_END subsf3ARM_FUNC_START floatunsisfARM_FUNC_ALIAS aeabi_ui2f floatunsisf			mov	r3, #0	b	1fARM_FUNC_START floatsisfARM_FUNC_ALIAS aeabi_i2f floatsisf		ands	r3, r0, #0x80000000	rsbmi	r0, r0, #01:	movs	ip, r0	RETc(eq)	@ Add initial exponent to sign	orr	r3, r3, #((127 + 23) << 23)	.ifnc	ah, r0	mov	ah, r0	.endif	mov	al, #0	b	2f	FUNC_END aeabi_i2f	FUNC_END floatsisf	FUNC_END aeabi_ui2f	FUNC_END floatunsisfARM_FUNC_START floatundisfARM_FUNC_ALIAS aeabi_ul2f floatundisf	orrs	r2, r0, r1#if !defined (__VFP_FP__) && !defined(__SOFTFP__)	mvfeqs	f0, #0.0#endif	RETc(eq)	mov	r3, #0	b	1fARM_FUNC_START floatdisfARM_FUNC_ALIAS aeabi_l2f floatdisf	orrs	r2, r0, r1#if !defined (__VFP_FP__) && !defined(__SOFTFP__)	mvfeqs	f0, #0.0#endif	RETc(eq)	ands	r3, ah, #0x80000000	@ sign bit in r3	bpl	1f	rsbs	al, al, #0	rsc	ah, ah, #01:#if !defined (__VFP_FP__) && !defined(__SOFTFP__)	@ For hard FPA code we want to return via the tail below so that	@ we can return the result in f0 as well as in r0 for backwards	@ compatibility.	str	lr, [sp, #-8]!	adr	lr, LSYM(f0_ret)#endif	movs	ip, ah	moveq	ip, al	moveq	ah, al	moveq	al, #0	@ Add initial exponent to sign	orr	r3, r3, #((127 + 23 + 32) << 23)	subeq	r3, r3, #(32 << 23)2:	sub	r3, r3, #(1 << 23)#if __ARM_ARCH__ < 5	mov	r2, #23	cmp	ip, #(1 << 16)	movhs	ip, ip, lsr #16	subhs	r2, r2, #16	cmp	ip, #(1 << 8)	movhs	ip, ip, lsr #8	subhs	r2, r2, #8	cmp	ip, #(1 << 4)	movhs	ip, ip, lsr #4	subhs	r2, r2, #4	cmp	ip, #(1 << 2)	subhs	r2, r2, #2	sublo	r2, r2, ip, lsr #1	subs	r2, r2, ip, lsr #3#else	clz	r2, ip	subs	r2, r2, #8#endif	sub	r3, r3, r2, lsl #23	blt	3f	add	r3, r3, ah, lsl r2	mov	ip, al, lsl r2	rsb	r2, r2, #32	cmp	ip, #0x80000000	adc	r0, r3, al, lsr r2	biceq	r0, r0, #1	RET3:	add	r2, r2, #32	mov	ip, ah, lsl r2	rsb	r2, r2, #32	orrs	al, al, ip, lsl #1	adc	r0, r3, ah, lsr r2	biceq	r0, r0, ip, lsr #31	RET#if !defined (__VFP_FP__) && !defined(__SOFTFP__)LSYM(f0_ret):	str	r0, [sp, #-4]!	ldfs	f0, [sp], #4	RETLDM#endif	FUNC_END floatdisf	FUNC_END aeabi_l2f	FUNC_END floatundisf	FUNC_END aeabi_ul2f#endif /* L_addsubsf3 */#ifdef L_muldivsf3ARM_FUNC_START mulsf3ARM_FUNC_ALIAS aeabi_fmul mulsf3	@ Mask out exponents, trap any zero/denormal/INF/NAN.	mov	ip, #0xff	ands	r2, ip, r0, lsr #23	andnes	r3, ip, r1, lsr #23	teqne	r2, ip	teqne	r3, ip	beq	LSYM(Lml_s)LSYM(Lml_x):	@ Add exponents together	add	r2, r2, r3	@ Determine final sign.	eor	ip, r0, r1	@ Convert mantissa to unsigned integer.	@ If power of two, branch to a separate path.	@ Make up for final alignment.	movs	r0, r0, lsl #9	movnes	r1, r1, lsl #9	beq	LSYM(Lml_1)	mov	r3, #0x08000000	orr	r0, r3, r0, lsr #5	orr	r1, r3, r1, lsr #5#if __ARM_ARCH__ < 4	@ Put sign bit in r3, which will be restored into r0 later.	and	r3, ip, #0x80000000	@ Well, no way to make it shorter without the umull instruction.	stmfd	sp!, {r3, r4, r5}	mov	r4, r0, lsr #16	mov	r5, r1, lsr #16	bic	r0, r0, r4, lsl #16	bic	r1, r1, r5, lsl #16	mul	ip, r4, r5	mul	r3, r0, r1	mul	r0, r5, r0	mla	r0, r4, r1, r0	adds	r3, r3, r0, lsl #16	adc	r1, ip, r0, lsr #16	ldmfd	sp!, {r0, r4, r5}#else	@ The actual multiplication.	umull	r3, r1, r0, r1	@ Put final sign in r0.	and	r0, ip, #0x80000000#endif	@ Adjust result upon the MSB position.	cmp	r1, #(1 << 23)	movcc	r1, r1, lsl #1	orrcc	r1, r1, r3, lsr #31	movcc	r3, r3, lsl #1	@ Add sign to result.	orr	r0, r0, r1	@ Apply exponent bias, check for under/overflow.	sbc	r2, r2, #127	cmp	r2, #(254 - 1)	bhi	LSYM(Lml_u)	@ Round the result, merge final exponent.	cmp	r3, #0x80000000	adc	r0, r0, r2, lsl #23	biceq	r0, r0, #1	RET	@ Multiplication by 0x1p*: let''s shortcut a lot of code.LSYM(Lml_1):	teq	r0, #0	and	ip, ip, #0x80000000	moveq	r1, r1, lsl #9	orr	r0, ip, r0, lsr #9	orr	r0, r0, r1, lsr #9	subs	r2, r2, #127
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -