📄 gcc-3.3.2-arm-softfloat.patch
字号:
++ mov r3, r3, rrx @ preserve C flag (the actual sign)++ @ check exponent range.+ mov ip, #0x7f000000+ orr ip, ip, #0x00f00000+ and r2, xh, ip+ teq r2, ip+ beq 2f @ value is INF or NAN+ bic ip, ip, #0x40000000+ cmp r2, ip+ bcc 1f @ value is too small+ add ip, ip, #(31 << 20)+ cmp r2, ip+ bcs 3f @ value is too large++ rsb r2, r2, ip+ mov ip, xh, lsl #11+ orr ip, ip, #0x80000000+ orr ip, ip, xl, lsr #21+ mov r2, r2, lsr #20+ tst r3, #0x80000000 @ the sign bit+ mov r0, ip, lsr r2+ rsbne r0, r0, #0+ RET++1: mov r0, #0+ RET++2: orrs xl, xl, xh, lsl #12+ bne 4f @ r0 is NAN.+3: ands r0, r3, #0x80000000 @ the sign bit+ moveq r0, #0x7fffffff @ maximum signed positive si+ RET++4: mov r0, #0 @ How should we convert NAN?+ RET++ FUNC_END fixdfsi++#endif /* L_fixdfsi */++#ifdef L_fixunsdfsi++ARM_FUNC_START fixunsdfsi+ orrs ip, xl, xh, lsl #1+ movcss r0, #0 @ value is negative+ RETc(eq) @ or 0 (xl, xh overlap r0)++ @ check exponent range.+ mov ip, #0x7f000000+ orr ip, ip, #0x00f00000+ and r2, xh, ip+ teq r2, ip+ beq 2f @ value is INF or NAN+ bic ip, ip, #0x40000000+ cmp r2, ip+ bcc 1f @ value is too small+ add ip, ip, #(31 << 20)+ cmp r2, ip+ bhi 3f @ value is too large++ rsb r2, r2, ip+ mov ip, xh, lsl #11+ orr ip, ip, #0x80000000+ orr ip, ip, xl, lsr #21+ mov r2, r2, lsr #20+ mov r0, ip, lsr r2+ RET++1: mov r0, #0+ RET++2: orrs xl, xl, xh, lsl #12+ bne 4f @ value is NAN.+3: mov r0, #0xffffffff @ maximum unsigned si+ RET++4: mov r0, #0 @ How should we convert NAN?+ RET++ FUNC_END fixunsdfsi++#endif /* L_fixunsdfsi */++#ifdef L_truncdfsf2++ARM_FUNC_START truncdfsf2+ orrs r2, xl, xh, lsl #1+ moveq r0, r2, rrx+ RETc(eq) @ value is 0.0 or -0.0+ + @ check exponent range.+ mov ip, #0x7f000000+ orr ip, ip, #0x00f00000+ and r2, ip, xh+ teq r2, ip+ beq 2f @ value is INF or NAN+ bic xh, xh, ip+ cmp r2, #(0x380 << 20)+ bls 4f @ value is too small++ @ shift and round mantissa+1: movs r3, xl, lsr #29+ adc r3, r3, xh, lsl #3++ @ if halfway between two numbers, round towards LSB = 0.+ mov xl, xl, lsl #3+ teq xl, #0x80000000+ biceq r3, r3, #1++ @ rounding might have created an extra MSB. If so adjust exponent.+ tst r3, #0x00800000+ addne r2, r2, #(1 << 20)+ bicne r3, r3, #0x00800000++ @ check exponent for overflow+ mov ip, #(0x400 << 20)+ orr ip, ip, #(0x07f << 20)+ cmp r2, ip+ bcs 3f @ overflow++ @ adjust exponent, merge with sign bit and mantissa.+ movs xh, xh, lsl #1+ mov r2, r2, lsl #4+ orr r0, r3, r2, rrx+ eor r0, r0, #0x40000000+ RET++2: @ chech for NAN+ orrs xl, xl, xh, lsl #12+ movne r0, #0x7f000000+ orrne r0, r0, #0x00c00000+ RETc(ne) @ return NAN++3: @ return INF with sign+ and r0, xh, #0x80000000+ orr r0, r0, #0x7f000000+ orr r0, r0, #0x00800000+ RET++4: @ check if denormalized value is possible+ subs r2, r2, #((0x380 - 24) << 20)+ andle r0, xh, #0x80000000 @ too small, return signed 0.+ RETc(le)+ + @ denormalize value so we can resume with the code above afterwards.+ orr xh, xh, #0x00100000+ mov r2, r2, lsr #20+ rsb r2, r2, #25+ cmp r2, #20+ bgt 6f++ rsb ip, r2, #32+ mov r3, xl, lsl ip+ mov xl, xl, lsr r2+ orr xl, xl, xh, lsl ip+ movs xh, xh, lsl #1+ mov xh, xh, lsr r2+ mov xh, xh, rrx+5: teq r3, #0 @ fold r3 bits into the LSB+ orrne xl, xl, #1 @ for rounding considerations. + mov r2, #(0x380 << 20) @ equivalent to the 0 float exponent+ b 1b++6: rsb r2, r2, #(12 + 20)+ rsb ip, r2, #32+ mov r3, xl, lsl r2+ mov xl, xl, lsr ip+ orr xl, xl, xh, lsl r2+ and xh, xh, #0x80000000+ b 5b++ FUNC_END truncdfsf2++#endif /* L_truncdfsf2 */diff -urN gcc-3.3/gcc/config/arm/ieee754-sf.S gcc-3.3-vfp/gcc/config/arm/ieee754-sf.S--- gcc-3.3/gcc/config/arm/ieee754-sf.S Wed Dec 31 19:00:00 1969+++ gcc-3.3-vfp/gcc/config/arm/ieee754-sf.S Mon Sep 8 12:57:46 2003@@ -0,0 +1,815 @@+/* ieee754-sf.S single-precision floating point support for ARM++ Copyright (C) 2003 Free Software Foundation, Inc.+ Contributed by Nicolas Pitre (nico@cam.org)++ This file is free software; you can redistribute it and/or modify it+ under the terms of the GNU General Public License as published by the+ Free Software Foundation; either version 2, or (at your option) any+ later version.++ In addition to the permissions in the GNU General Public License, the+ Free Software Foundation gives you unlimited permission to link the+ compiled version of this file into combinations with other programs,+ and to distribute those combinations without any restriction coming+ from the use of this file. (The General Public License restrictions+ do apply in other respects; for example, they cover modification of+ the file, and distribution when not linked into a combine+ executable.)++ This file is distributed in the hope that it will be useful, but+ WITHOUT ANY WARRANTY; without even the implied warranty of+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU+ General Public License for more details.++ You should have received a copy of the GNU General Public License+ along with this program; see the file COPYING. If not, write to+ the Free Software Foundation, 59 Temple Place - Suite 330,+ Boston, MA 02111-1307, USA. */++/*+ * Notes:+ *+ * The goal of this code is to be as fast as possible. This is+ * not meant to be easy to understand for the casual reader.+ *+ * Only the default rounding mode is intended for best performances.+ * Exceptions aren't supported yet, but that can be added quite easily+ * if necessary without impacting performances.+ */++#ifdef L_negsf2+ +ARM_FUNC_START negsf2+ eor r0, r0, #0x80000000 @ flip sign bit+ RET++ FUNC_END negsf2++#endif++#ifdef L_addsubsf3++ARM_FUNC_START subsf3+ eor r1, r1, #0x80000000 @ flip sign bit of second arg+#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)+ b 1f @ Skip Thumb-code prologue+#endif++ARM_FUNC_START addsf3++1: @ Compare both args, return zero if equal but the sign.+ eor r2, r0, r1+ teq r2, #0x80000000+ beq LSYM(Lad_z)++ @ If first arg is 0 or -0, return second arg.+ @ If second arg is 0 or -0, return first arg.+ bics r2, r0, #0x80000000+ moveq r0, r1+ bicnes r2, r1, #0x80000000+ RETc(eq)++ @ Mask out exponents.+ mov ip, #0xff000000+ and r2, r0, ip, lsr #1+ and r3, r1, ip, lsr #1++ @ If either of them is 255, result will be INF or NAN+ teq r2, ip, lsr #1+ teqne r3, ip, lsr #1+ beq LSYM(Lad_i)++ @ Compute exponent difference. Make largest exponent in r2,+ @ corresponding arg in r0, and positive exponent difference in r3.+ subs r3, r3, r2+ addgt r2, r2, r3+ eorgt r1, r0, r1+ eorgt r0, r1, r0+ eorgt r1, r0, r1+ rsblt r3, r3, #0++ @ If exponent difference is too large, return largest argument+ @ already in r0. We need up to 25 bit to handle proper rounding+ @ of 0x1p25 - 1.1.+ cmp r3, #(25 << 23)+ RETc(hi)++ @ Convert mantissa to signed integer.+ tst r0, #0x80000000+ orr r0, r0, #0x00800000+ bic r0, r0, #0xff000000+ rsbne r0, r0, #0+ tst r1, #0x80000000+ orr r1, r1, #0x00800000+ bic r1, r1, #0xff000000+ rsbne r1, r1, #0++ @ If exponent == difference, one or both args were denormalized.+ @ Since this is not common case, rescale them off line.+ teq r2, r3+ beq LSYM(Lad_d)+LSYM(Lad_x):++ @ Scale down second arg with exponent difference.+ @ Apply shift one bit left to first arg and the rest to second arg+ @ to simplify things later, but only if exponent does not become 0.+ movs r3, r3, lsr #23+ teqne r2, #(1 << 23)+ movne r0, r0, lsl #1+ subne r2, r2, #(1 << 23)+ subne r3, r3, #1++ @ Shift second arg into ip, keep leftover bits into r1.+ mov ip, r1, asr r3+ rsb r3, r3, #32+ mov r1, r1, lsl r3++ add r0, r0, ip @ the actual addition++ @ We now have a 64 bit result in r0-r1.+ @ Keep absolute value in r0-r1, sign in r3.+ ands r3, r0, #0x80000000+ bpl LSYM(Lad_p)+ rsbs r1, r1, #0+ rsc r0, r0, #0++ @ Determine how to normalize the result.+LSYM(Lad_p):+ cmp r0, #0x00800000+ bcc LSYM(Lad_l)+ cmp r0, #0x01000000+ bcc LSYM(Lad_r0)+ cmp r0, #0x02000000+ bcc LSYM(Lad_r1)++ @ Result needs to be shifted right.+ movs r0, r0, lsr #1+ mov r1, r1, rrx+ add r2, r2, #(1 << 23)+LSYM(Lad_r1):+ movs r0, r0, lsr #1+ mov r1, r1, rrx+ add r2, r2, #(1 << 23)++ @ Our result is now properly aligned into r0, remaining bits in r1.+ @ Round with MSB of r1. If halfway between two numbers, round towards+ @ LSB of r0 = 0. +LSYM(Lad_r0):+ add r0, r0, r1, lsr #31+ teq r1, #0x80000000+ biceq r0, r0, #1++ @ Rounding may have added a new MSB. Adjust exponent.+ @ That MSB will be cleared when exponent is merged below.+ tst r0, #0x01000000+ addne r2, r2, #(1 << 23)++ @ Make sure we did not bust our exponent.+ cmp r2, #(254 << 23)+ bhi LSYM(Lad_o)++ @ Pack final result together.+LSYM(Lad_e):+ bic r0, r0, #0x01800000+ orr r0, r0, r2+ orr r0, r0, r3+ RET++ @ Result must be shifted left.+ @ No rounding necessary since r1 will always be 0.+LSYM(Lad_l):++#if __ARM_ARCH__ < 5++ movs ip, r0, lsr #12+ moveq r0, r0, lsl #12+ subeq r2, r2, #(12 << 23)+ tst r0, #0x00ff0000+ moveq r0, r0, lsl #8+ subeq r2, r2, #(8 << 23)+ tst r0, #0x00f00000+ moveq r0, r0, lsl #4+ subeq r2, r2, #(4 << 23)+ tst r0, #0x00c00000+ moveq r0, r0, lsl #2+ subeq r2, r2, #(2 << 23)+ tst r0, #0x00800000+ moveq r0, r0, lsl #1+ subeq r2, r2, #(1 << 23)+ cmp r2, #0+ bgt LSYM(Lad_e)++#else++ clz ip, r0+ sub ip, ip, #8+ mov r0, r0, lsl ip+ subs r2, r2, ip, lsl #23+ bgt LSYM(Lad_e)++#endif++ @ Exponent too small, denormalize result.+ mvn r2, r2, asr #23+ add r2, r2, #2+ orr r0, r3, r0, lsr r2+ RET++ @ Fixup and adjust bit position for denormalized arguments.+ @ Note that r2 must not remain equal to 0.+LSYM(Lad_d):+ teq r2, #0+ eoreq r0, r0, #0x00800000+ addeq r2, r2, #(1 << 23)+ eor r1, r1, #0x00800000+ subne r3, r3, #(1 << 23)+ b LSYM(Lad_x)++ @ Result is x - x = 0, unless x is INF or NAN.+LSYM(Lad_z):+ mov ip, #0xff000000+ and r2, r0, ip, lsr #1+ teq r2, ip, lsr #1+ moveq r0, ip, asr #2+ movne r0, #0+ RET++ @ Overflow: return INF.+LSYM(Lad_o):+ orr r0, r3, #0x7f000000+ orr r0, r0, #0x00800000+ RET++ @ At least one of r0/r1 is INF/NAN.+ @ if r0 != INF/NAN: return r1 (which is INF/NAN)+ @ if r1 != INF/NAN: return r0 (which is INF/NAN)+ @ if r0 or r1 is NAN: return NAN+ @ if opposite sign: return NAN+ @ return r0 (which is INF or -INF)+LSYM(Lad_i):+ teq r2, ip, lsr #1+ movne r0, r1+ teqeq r3, ip, lsr #1+ RETc(ne)+ movs r2, r0, lsl #9+ moveqs r2, r1, lsl #9+ teqeq r0, r1+ orrne r0, r3, #0x00400000 @ NAN+ RET++ FUNC_END addsf3+ FUNC_END subsf3++ARM_FUNC_START floatunsisf+ mov r3, #0+ b 1f++ARM_FUNC_START floatsisf+ ands r3, r0, #0x80000000+ rsbmi r0, r0, #0++1: teq r0, #0+ RETc(eq)++ mov r1, #0+ mov r2, #((127 + 23) << 23)+ tst r0, #0xfc000000+ beq LSYM(Lad_p)++ @ We need to scale the value a little before branching to code above.+ tst r0, #0xf0000000+ movne r1, r0, lsl #28+ movne r0, r0, lsr #4+ addne r2, r2, #(4 << 23)+ tst r0, #0x0c000000+ beq LSYM(Lad_p)+ mov r1, r1, lsr #2+ orr r1, r1, r0, lsl #30+ mov r0, r0, lsr #2+ add r2, r2, #(2 << 23)+ b LSYM(Lad_p)++ FUNC_END floatsisf+ FUNC_END floatunsisf++#endif /* L_addsubsf3 */++#ifdef L_muldivsf3++ARM_FUNC_START mulsf3++ @ Mask out exponents.+ mov ip, #0xff000000+ and r2, r0, ip, lsr #1+ and r3, r1, ip, lsr #1++ @ Trap any INF/NAN.+ teq r2, ip, lsr #1+ teqne r3, ip, lsr #1+ beq LSYM(Lml_s)++ @ Trap any multiplication by 0.+ bics ip, r0, #0x80000000+ bicnes ip, r1, #0x80000000+ beq LSYM(Lml_z)++ @ Shift exponents right one bit to make room for overflow bit.+ @ If either of them is 0, scale denormalized arguments off line.+ @ Then add both exponents together.+ movs r2, r2, lsr #1+ teqne r3, #0+ beq LSYM(Lml_d)+LSYM(Lml_x):+ add r2, r2, r3, asr #1++ @ Preserve final sign in r2 along with exponent for now.+ teq r0, r1+ orrmi r2, r2, #0x8000++ @ Convert mantissa to unsigned integer.+ bic r0, r0, #0xff000000+ bic r1, r1, #0xff000000+ orr r0, r0, #0x00800000+ orr r1, r1, #0x00800000++#if __ARM_ARCH__ < 4++ @ Well, no way to make it shorter without the umull instruction.+ @ We must perform that 24 x 24 -> 48 bit multiplication by hand.+ stmfd sp!, {r4, r5}+ mov r4, r0, lsr #16+ mov r5, r1, lsr #16+ bic r0, r0, #0x00ff0000+ bic r1, r1, #0x00ff0000+ mul ip, r4, r5+ mul r3, r0, r1+ mul r0, r5, r0+ mla r0, r4, r1, r0+ adds r3, r3, r0, lsl #16+ adc ip, ip, r0, lsr #16+ ldmfd sp!, {r4, r5}++#else++ umull r3, ip, r0, r1 @ The actual multiplication.++#endif++ @ Put final sign in r0.+ mov r0, r2, lsl #16+ bic r2, r2, #0x8000++ @ Adjust result if one extra MSB appeared.+ @ The LSB may be lost but this never changes the result in this case.+ tst ip, #(1 << 15)+ addne r2, r2, #(1 << 22)+ movnes ip, ip, lsr #1+ movne r3, r3, rrx++ @ Apply exponent bias, check range for underflow.+ subs r2, r2, #(127 << 22)+ ble LSYM(Lml_u)++ @ Scale back to 24 bits with rounding.+ @ r0 contains sign bit already.+ orrs r0, r0, r3, lsr #23+ adc r0, r0, ip, lsl #9++ @ If halfway between two numbers, rounding should be towards LSB = 0.+ mov r3, r3, lsl #9+ teq r3, #0x80000000+ biceq r0, r0, #1++ @ Note: rounding may have produced an extra MSB here.+ @ The extra bit is cleared before merging the exponent below.+ tst r0, #0x01000000+ addne r2, r2, #(1 << 22)++ @ Check for exponent overflow+ cmp r2, #(255 << 22)+ bge LSYM(Lml_o)++ @ Add final exponent.+ bic r0, r0, #0x01800000+ orr r0, r0, r2, lsl #1+ RET+
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -