📄 crosstool-0.38-softfloatlib.diff
字号:
++ bcc 1f @ value is too small++ add ip, ip, #(31 << 20)++ cmp r2, ip++ bcs 3f @ value is too large++++ rsb r2, r2, ip++ mov ip, xh, lsl #11++ orr ip, ip, #0x80000000++ orr ip, ip, xl, lsr #21++ mov r2, r2, lsr #20++ tst r3, #0x80000000 @ the sign bit++ mov r0, ip, lsr r2++ rsbne r0, r0, #0++ RET++++1: mov r0, #0++ RET++++2: orrs xl, xl, xh, lsl #12++ bne 4f @ r0 is NAN.++3: ands r0, r3, #0x80000000 @ the sign bit++ moveq r0, #0x7fffffff @ maximum signed positive si++ RET++++4: mov r0, #0 @ How should we convert NAN?++ RET++++ FUNC_END fixdfsi++++#endif /* L_fixdfsi */++++#ifdef L_fixunsdfsi++++ARM_FUNC_START fixunsdfsi++ orrs ip, xl, xh, lsl #1++ movcss r0, #0 @ value is negative++ RETc(eq) @ or 0 (xl, xh overlap r0)++++ @ check exponent range.++ mov ip, #0x7f000000++ orr ip, ip, #0x00f00000++ and r2, xh, ip++ teq r2, ip++ beq 2f @ value is INF or NAN++ bic ip, ip, #0x40000000++ cmp r2, ip++ bcc 1f @ value is too small++ add ip, ip, #(31 << 20)++ cmp r2, ip++ bhi 3f @ value is too large++++ rsb r2, r2, ip++ mov ip, xh, lsl #11++ orr ip, ip, #0x80000000++ orr ip, ip, xl, lsr #21++ mov r2, r2, lsr #20++ mov r0, ip, lsr r2++ RET++++1: mov r0, #0++ RET++++2: orrs xl, xl, xh, lsl #12++ bne 4f @ value is NAN.++3: mov r0, #0xffffffff @ maximum unsigned si++ RET++++4: mov r0, #0 @ How should we convert NAN?++ RET++++ FUNC_END fixunsdfsi++++#endif /* L_fixunsdfsi */++++#ifdef L_truncdfsf2++++ARM_FUNC_START truncdfsf2++ orrs r2, xl, xh, lsl #1++ moveq r0, r2, rrx++ RETc(eq) @ value is 0.0 or -0.0++ ++ @ check exponent range.++ mov ip, #0x7f000000++ orr ip, ip, #0x00f00000++ and r2, ip, xh++ teq r2, ip++ beq 2f @ value is INF or NAN++ bic xh, xh, ip++ cmp r2, #(0x380 << 20)++ bls 4f @ value is too small++++ @ shift and round mantissa++1: movs r3, xl, lsr #29++ adc r3, r3, xh, lsl #3++++ @ if halfway between two numbers, round towards LSB = 0.++ mov xl, xl, lsl #3++ teq xl, #0x80000000++ biceq r3, r3, #1++++ @ rounding might have created an extra MSB. If so adjust exponent.++ tst r3, #0x00800000++ addne r2, r2, #(1 << 20)++ bicne r3, r3, #0x00800000++++ @ check exponent for overflow++ mov ip, #(0x400 << 20)++ orr ip, ip, #(0x07f << 20)++ cmp r2, ip++ bcs 3f @ overflow++++ @ adjust exponent, merge with sign bit and mantissa.++ movs xh, xh, lsl #1++ mov r2, r2, lsl #4++ orr r0, r3, r2, rrx++ eor r0, r0, #0x40000000++ RET++++2: @ chech for NAN++ orrs xl, xl, xh, lsl #12++ movne r0, #0x7f000000++ orrne r0, r0, #0x00c00000++ RETc(ne) @ return NAN++++3: @ return INF with sign++ and r0, xh, #0x80000000++ orr r0, r0, #0x7f000000++ orr r0, r0, #0x00800000++ RET++++4: @ check if denormalized value is possible++ subs r2, r2, #((0x380 - 24) << 20)++ andle r0, xh, #0x80000000 @ too small, return signed 0.++ RETc(le)++ ++ @ denormalize value so we can resume with the code above afterwards.++ orr xh, xh, #0x00100000++ mov r2, r2, lsr #20++ rsb r2, r2, #25++ cmp r2, #20++ bgt 6f++++ rsb ip, r2, #32++ mov r3, xl, lsl ip++ mov xl, xl, lsr r2++ orr xl, xl, xh, lsl ip++ movs xh, xh, lsl #1++ mov xh, xh, lsr r2++ mov xh, xh, rrx++5: teq r3, #0 @ fold r3 bits into the LSB++ orrne xl, xl, #1 @ for rounding considerations. ++ mov r2, #(0x380 << 20) @ equivalent to the 0 float exponent++ b 1b++++6: rsb r2, r2, #(12 + 20)++ rsb ip, r2, #32++ mov r3, xl, lsl r2++ mov xl, xl, lsr ip++ orr xl, xl, xh, lsl r2++ and xh, xh, #0x80000000++ b 5b++++ FUNC_END truncdfsf2++++#endif /* L_truncdfsf2 */+diff -urN gcc-3.3.2-be/gcc/config/arm/ieee754-sf.S gcc-3.3.2-be-sf/gcc/config/arm/ieee754-sf.S+--- gcc-3.3.2-be/gcc/config/arm/ieee754-sf.S 1970-01-01 01:00:00.000000000 +0100++++ gcc-3.3.2-be-sf/gcc/config/arm/ieee754-sf.S 2005-06-09 13:11:49.000000000 +0200+@@ -0,0 +1,815 @@++/* ieee754-sf.S single-precision floating point support for ARM++++ Copyright (C) 2003 Free Software Foundation, Inc.++ Contributed by Nicolas Pitre (nico@cam.org)++++ This file is free software; you can redistribute it and/or modify it++ under the terms of the GNU General Public License as published by the++ Free Software Foundation; either version 2, or (at your option) any++ later version.++++ In addition to the permissions in the GNU General Public License, the++ Free Software Foundation gives you unlimited permission to link the++ compiled version of this file into combinations with other programs,++ and to distribute those combinations without any restriction coming++ from the use of this file. (The General Public License restrictions++ do apply in other respects; for example, they cover modification of++ the file, and distribution when not linked into a combine++ executable.)++++ This file is distributed in the hope that it will be useful, but++ WITHOUT ANY WARRANTY; without even the implied warranty of++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU++ General Public License for more details.++++ You should have received a copy of the GNU General Public License++ along with this program; see the file COPYING. If not, write to++ the Free Software Foundation, 59 Temple Place - Suite 330,++ Boston, MA 02111-1307, USA. */++++/*++ * Notes:++ *++ * The goal of this code is to be as fast as possible. This is++ * not meant to be easy to understand for the casual reader.++ *++ * Only the default rounding mode is intended for best performances.++ * Exceptions aren't supported yet, but that can be added quite easily++ * if necessary without impacting performances.++ */++++#ifdef L_negsf2++ ++ARM_FUNC_START negsf2++ eor r0, r0, #0x80000000 @ flip sign bit++ RET++++ FUNC_END negsf2++++#endif++++#ifdef L_addsubsf3++++ARM_FUNC_START subsf3++ eor r1, r1, #0x80000000 @ flip sign bit of second arg++#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)++ b 1f @ Skip Thumb-code prologue++#endif++++ARM_FUNC_START addsf3++++1: @ Compare both args, return zero if equal but the sign.++ eor r2, r0, r1++ teq r2, #0x80000000++ beq LSYM(Lad_z)++++ @ If first arg is 0 or -0, return second arg.++ @ If second arg is 0 or -0, return first arg.++ bics r2, r0, #0x80000000++ moveq r0, r1++ bicnes r2, r1, #0x80000000++ RETc(eq)++++ @ Mask out exponents.++ mov ip, #0xff000000++ and r2, r0, ip, lsr #1++ and r3, r1, ip, lsr #1++++ @ If either of them is 255, result will be INF or NAN++ teq r2, ip, lsr #1++ teqne r3, ip, lsr #1++ beq LSYM(Lad_i)++++ @ Compute exponent difference. Make largest exponent in r2,++ @ corresponding arg in r0, and positive exponent difference in r3.++ subs r3, r3, r2++ addgt r2, r2, r3++ eorgt r1, r0, r1++ eorgt r0, r1, r0++ eorgt r1, r0, r1++ rsblt r3, r3, #0++++ @ If exponent difference is too large, return largest argument++ @ already in r0. We need up to 25 bit to handle proper rounding++ @ of 0x1p25 - 1.1.++ cmp r3, #(25 << 23)++ RETc(hi)++++ @ Convert mantissa to signed integer.++ tst r0, #0x80000000++ orr r0, r0, #0x00800000++ bic r0, r0, #0xff000000++ rsbne r0, r0, #0++ tst r1, #0x80000000++ orr r1, r1, #0x00800000++ bic r1, r1, #0xff000000++ rsbne r1, r1, #0++++ @ If exponent == difference, one or both args were denormalized.++ @ Since this is not common case, rescale them off line.++ teq r2, r3++ beq LSYM(Lad_d)++LSYM(Lad_x):++++ @ Scale down second arg with exponent difference.++ @ Apply shift one bit left to first arg and the rest to second arg++ @ to simplify things later, but only if exponent does not become 0.++ movs r3, r3, lsr #23++ teqne r2, #(1 << 23)++ movne r0, r0, lsl #1++ subne r2, r2, #(1 << 23)++ subne r3, r3, #1++++ @ Shift second arg into ip, keep leftover bits into r1.++ mov ip, r1, asr r3++ rsb r3, r3, #32++ mov r1, r1, lsl r3++++ add r0, r0, ip @ the actual addition++++ @ We now have a 64 bit result in r0-r1.++ @ Keep absolute value in r0-r1, sign in r3.++ ands r3, r0, #0x80000000++ bpl LSYM(Lad_p)++ rsbs r1, r1, #0++ rsc r0, r0, #0++++ @ Determine how to normalize the result.++LSYM(Lad_p):++ cmp r0, #0x00800000++ bcc LSYM(Lad_l)++ cmp r0, #0x01000000++ bcc LSYM(Lad_r0)++ cmp r0, #0x02000000++ bcc LSYM(Lad_r1)++++ @ Result needs to be shifted right.++ movs r0, r0, lsr #1++ mov r1, r1, rrx++ add r2, r2, #(1 << 23)++LSYM(Lad_r1):++ movs r0, r0, lsr #1++ mov r1, r1, rrx++ add r2, r2, #(1 << 23)++++ @ Our result is now properly aligned into r0, remaining bits in r1.++ @ Round with MSB of r1. If halfway between two numbers, round towards++ @ LSB of r0 = 0. ++LSYM(Lad_r0):++ add r0, r0, r1, lsr #31++ teq r1, #0x80000000++ biceq r0, r0, #1++++ @ Rounding may have added a new MSB. Adjust exponent.++ @ That MSB will be cleared when exponent is merged below.++ tst r0, #0x01000000++ addne r2, r2, #(1 << 23)++++ @ Make sure we did not bust our exponent.++ cmp r2, #(254 << 23)++ bhi LSYM(Lad_o)++++ @ Pack final result together.++LSYM(Lad_e):++ bic r0, r0, #0x01800000++ orr r0, r0, r2++ orr r0, r0, r3++ RET++++ @ Result must be shifted left.++ @ No rounding necessary since r1 will always be 0.++LSYM(Lad_l):++++#if __ARM_ARCH__ < 5++++ movs ip, r0, lsr #12++ moveq r0, r0, lsl #12++ subeq r2, r2, #(12 << 23)++ tst r0, #0x00ff0000++ moveq r0, r0, lsl #8++ subeq r2, r2, #(8 << 23)++ tst r0, #0x00f00000++ moveq r0, r0, lsl #4++ subeq r2, r2, #(4 << 23)++ tst r0, #0x00c00000++ moveq r0, r0, lsl #2++ subeq r2, r2, #(2 << 23)++ tst r0, #0x00800000++ moveq r0, r0, lsl #1++ subeq r2, r2, #(1 << 23)++ cmp r2, #0++ bgt LSYM(Lad_e)++++#else++++ clz ip, r0++ sub ip, ip, #8++ mov r0, r0, lsl ip++ subs r2, r2, ip, lsl #23++ bgt LSYM(Lad_e)++++#endif++++ @ Exponent too small, denormalize result.++ mvn r2, r2, asr #23++ add r2, r2, #2++ orr r0, r3, r0, lsr r2++ RET++++ @ Fixup and adjust bit position for denormalized arguments.++ @ Note that r2 must not remain equal to 0.++LSYM(Lad_d):++ teq r2, #0++ eoreq r0, r0, #0x00800000++ addeq r2, r2, #(1 << 23)++ eor r1, r1, #0x00800000++ subne r3, r3, #(1 << 23)++ b LSYM(Lad_x)++++ @ Result is x - x = 0, unless x is INF or NAN.++LSYM(Lad_z):++ mov ip, #0xff000000++ and r2, r0, ip, lsr #1++ teq r2, ip, lsr #1++ moveq r0, ip, asr #2++ movne r0, #0++ RET++++ @ Overflow: return INF.++LSYM(Lad_o):++ orr r0, r3, #0x7f000000++ orr r0, r0, #0x00800000++ RET++++ @ At least one of r0/r1 is INF/NAN.++ @ if r0 != INF/NAN: return r1 (which is INF/NAN)++ @ if r1 != INF/NAN: return r0 (which is INF/NAN)++ @ if r0 or r1 is NAN: return NAN++ @ if opposite sign: return NAN++ @ return r0 (which is INF or -INF)++LSYM(Lad_i):++ teq r2, ip, lsr #1++ movne r0, r1++ teqeq r3, ip, lsr #1++ RETc(ne)++ movs r2, r0, lsl #9++ moveqs r2, r1, lsl #9++ teqeq r0, r1++ orrne r0, r3, #0x00400000 @ NAN++ RET++++ FUNC_END addsf3++ FUNC_END subsf3++++ARM_FUNC_START floatunsisf++ mov r3, #0++ b 1f++++ARM_FUNC_START floatsisf++ ands r3, r0, #0x80000000++ rsbmi r0, r0, #0++++1: teq r0, #0++ RETc(eq)++++ mov r1, #0++ mov r2, #((127 + 23) << 23)++ tst r0, #0xfc000000++ beq LSYM(Lad_p)++++ @ We need to scale the value a little before branching to code above.++ tst r0, #0xf0000000++ movne r1, r0, lsl #28++ movne r0, r0, lsr #4++ addne r2, r2, #(4 << 23)++ tst r0, #0x0c000000++ beq LSYM(Lad_p)++ mov r1, r1, lsr #2++ orr r1, r1, r0, lsl #30++ mov r0, r0, lsr #2++ add r2, r2, #(2 << 23)++ b LSYM(Lad_p)++++ FUNC_END floatsisf++ FUNC_END floatunsisf++++#endif /* L_addsubsf3 */++++#ifdef L_muldivsf3++++ARM_FUNC_START mulsf3++++ @ Mask out exponents.++ mov ip, #0xff000000++ and r2, r0, ip, lsr #1++ and r3, r1, ip, lsr #1++++ @ Trap any INF/NAN.++ teq r2, ip, lsr #1++ teqne r3, ip, lsr #1++ beq LSYM(Lml_s)++++ @ Trap any multiplication by 0.++ bics ip, r0, #0x80000000++ bicnes ip, r1, #0x80000000++ beq LSYM(Lml_z)++++ @ Shift exponents right one bit to make room for overflow bit.++ @ If either of them is 0, scale denormalized arguments off line.++ @ Then add both exponents together.++ movs r2, r2, lsr #1++ teqne r3, #0++ beq LSYM(Lml_d)++LSYM(Lml_x):++ add r2, r2, r3, asr #1++++ @ Preserve final sign in r2 along with exponent for now.++ teq r0, r1++ orrmi r2, r2, #0x8000++++ @ Convert mantissa to unsigned integer.++ bic r0, r0, #0xff000000++ bic r1, r1, #0xff000000++ orr r0, r0, #0x00800000++ orr r1, r1, #0x00800000++++#if __ARM_ARCH__ < 4++++ @ Well, no way to make it shorter without the umull instruction.++ @ We must perform that 24 x 24 -> 48 bit multiplication by hand.++ stmfd sp!, {r4, r5}++ mov r4, r0, lsr #16++ mov r5, r1, lsr #16++ bic r0, r0, #0x00ff0000++ bic r1, r1, #0x00ff0000++ mul ip, r4, r5++ mul r3, r0, r1++ mul r0, r5, r0++ mla r0, r4, r1, r0++ adds r3, r3, r0, lsl #16++ adc ip, ip, r0, lsr #16++ ldmfd sp!, {r4, r5}++++#else++++ umull r3, ip, r0, r1 @ The actual multiplication.++++#endif++++ @ Put final sign in r0.++ mov r0, r2, lsl #16++ bic r2, r2, #0x8000++++ @ Adjust result if one extra MSB appeared.++ @ The LSB may be lost but this never changes the result in this case.++ tst ip, #(1 << 15)++ addne r2, r2, #(1 << 22)++ movnes ip, ip, lsr #1++ movne r3, r3, rrx++++ @ Apply exponent bias, check range for underflow.++ subs r2, r2, #(127 << 22)++ ble LSYM(Lml_u)++++ @ Scale back to 24 bits with rounding.++ @ r0 contains sign bit already.++ orrs r0, r0, r3, lsr #23++ adc r0, r0, ip, lsl #9++++ @ If halfway between two numbers, rounding should be towards LSB = 0.++ mov r3, r3, lsl #9++ teq r3, #0x80000000++ biceq r0, r0, #1++++ @ Note: rounding may have produced an extra MSB here.++ @ The extra bit is cleared before merging the exponent below.++ tst r0, #0x01000000++ addne r2, r2, #(1 << 22)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -