ieee754-df.s
来自「俄罗斯高人Mamaich的Pocket gcc编译器(运行在PocketPC上)」· S 代码 · 共 1,226 行 · 第 1/2 页
S
1,226 行
/* ieee754-df.S double-precision floating point support for ARM Copyright (C) 2003, 2004 Free Software Foundation, Inc. Contributed by Nicolas Pitre (nico@cam.org) This file is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. In addition to the permissions in the GNU General Public License, the Free Software Foundation gives you unlimited permission to link the compiled version of this file into combinations with other programs, and to distribute those combinations without any restriction coming from the use of this file. (The General Public License restrictions do apply in other respects; for example, they cover modification of the file, and distribution when not linked into a combine executable.) This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *//* * Notes: * * The goal of this code is to be as fast as possible. This is * not meant to be easy to understand for the casual reader. * For slightly simpler code please see the single precision version * of this file. * * Only the default rounding mode is intended for best performances. * Exceptions aren't supported yet, but that can be added quite easily * if necessary without impacting performances. */@ For FPA, float words are always big-endian.@ For VFP, floats words follow the memory system mode.#if 1/* defined(__VFP_FP__) && !defined(__ARMEB__) */#define xl r0#define xh r1#define yl r2#define yh r3#else#define xh r0#define xl r1#define yh r2#define yl r3#endif#ifdef L_negdf2ARM_FUNC_START negdf2 @ flip sign bit eor xh, xh, #0x80000000 RET FUNC_END negdf2#endif#ifdef L_addsubdf3ARM_FUNC_START subdf3 @ flip sign bit of second arg eor yh, yh, #0x80000000#if defined(__thumb__) && !defined(__THUMB_INTERWORK__) b 1f @ Skip Thumb-code prologue#endifARM_FUNC_START adddf31: @ Compare both args, return zero if equal but the sign. teq xl, yl eoreq ip, xh, yh teqeq ip, #0x80000000 beq LSYM(Lad_z) @ If first arg is 0 or -0, return second arg. @ If second arg is 0 or -0, return first arg. orrs ip, xl, xh, lsl #1 moveq xl, yl moveq xh, yh orrnes ip, yl, yh, lsl #1 RETc(eq) stmfd sp!, {r4, r5, lr} @ Mask out exponents. mov ip, #0x7f000000 orr ip, ip, #0x00f00000 and r4, xh, ip and r5, yh, ip @ If either of them is 0x7ff, result will be INF or NAN teq r4, ip teqne r5, ip beq LSYM(Lad_i) @ Compute exponent difference. Make largest exponent in r4, @ corresponding arg in xh-xl, and positive exponent difference in r5. subs r5, r5, r4 rsblt r5, r5, #0 ble 1f add r4, r4, r5 eor yl, xl, yl eor yh, xh, yh eor xl, yl, xl eor xh, yh, xh eor yl, xl, yl eor yh, xh, yh1: @ If exponent difference is too large, return largest argument @ already in xh-xl. We need up to 54 bit to handle proper rounding @ of 0x1p54 - 1.1. cmp r5, #(54 << 20) RETLDM "r4, r5" hi @ Convert mantissa to signed integer. tst xh, #0x80000000 bic xh, xh, ip, lsl #1 orr xh, xh, #0x00100000 beq 1f rsbs xl, xl, #0 rsc xh, xh, #01: tst yh, #0x80000000 bic yh, yh, ip, lsl #1 orr yh, yh, #0x00100000 beq 1f rsbs yl, yl, #0 rsc yh, yh, #01: @ If exponent == difference, one or both args were denormalized. @ Since this is not common case, rescale them off line. teq r4, r5 beq LSYM(Lad_d)LSYM(Lad_x): @ Scale down second arg with exponent difference. @ Apply shift one bit left to first arg and the rest to second arg @ to simplify things later, but only if exponent does not become 0. mov ip, #0 movs r5, r5, lsr #20 beq 3f teq r4, #(1 << 20) beq 1f movs xl, xl, lsl #1 adc xh, ip, xh, lsl #1 sub r4, r4, #(1 << 20) subs r5, r5, #1 beq 3f @ Shift yh-yl right per r5, keep leftover bits into ip.1: rsbs lr, r5, #32 blt 2f mov ip, yl, lsl lr mov yl, yl, lsr r5 orr yl, yl, yh, lsl lr mov yh, yh, asr r5 b 3f2: sub r5, r5, #32 add lr, lr, #32 cmp yl, #1 adc ip, ip, yh, lsl lr mov yl, yh, asr r5 mov yh, yh, asr #323: @ the actual addition adds xl, xl, yl adc xh, xh, yh @ We now have a result in xh-xl-ip. @ Keep absolute value in xh-xl-ip, sign in r5. ands r5, xh, #0x80000000 bpl LSYM(Lad_p) rsbs ip, ip, #0 rscs xl, xl, #0 rsc xh, xh, #0 @ Determine how to normalize the result.LSYM(Lad_p): cmp xh, #0x00100000 bcc LSYM(Lad_l) cmp xh, #0x00200000 bcc LSYM(Lad_r0) cmp xh, #0x00400000 bcc LSYM(Lad_r1) @ Result needs to be shifted right. movs xh, xh, lsr #1 movs xl, xl, rrx movs ip, ip, rrx orrcs ip, ip, #1 add r4, r4, #(1 << 20)LSYM(Lad_r1): movs xh, xh, lsr #1 movs xl, xl, rrx movs ip, ip, rrx orrcs ip, ip, #1 add r4, r4, #(1 << 20) @ Our result is now properly aligned into xh-xl, remaining bits in ip. @ Round with MSB of ip. If halfway between two numbers, round towards @ LSB of xl = 0.LSYM(Lad_r0): adds xl, xl, ip, lsr #31 adc xh, xh, #0 teq ip, #0x80000000 biceq xl, xl, #1 @ One extreme rounding case may add a new MSB. Adjust exponent. @ That MSB will be cleared when exponent is merged below. tst xh, #0x00200000 addne r4, r4, #(1 << 20) @ Make sure we did not bust our exponent. adds ip, r4, #(1 << 20) bmi LSYM(Lad_o) @ Pack final result together.LSYM(Lad_e): bic xh, xh, #0x00300000 orr xh, xh, r4 orr xh, xh, r5 RETLDM "r4, r5"LSYM(Lad_l): @ Result must be shifted left and exponent adjusted. @ No rounding necessary since ip will always be 0.#if __ARM_ARCH__ < 5 teq xh, #0 movne r3, #-11 moveq r3, #21 moveq xh, xl moveq xl, #0 mov r2, xh movs ip, xh, lsr #16 moveq r2, r2, lsl #16 addeq r3, r3, #16 tst r2, #0xff000000 moveq r2, r2, lsl #8 addeq r3, r3, #8 tst r2, #0xf0000000 moveq r2, r2, lsl #4 addeq r3, r3, #4 tst r2, #0xc0000000 moveq r2, r2, lsl #2 addeq r3, r3, #2 tst r2, #0x80000000 addeq r3, r3, #1#else teq xh, #0 moveq xh, xl moveq xl, #0 clz r3, xh addeq r3, r3, #32 sub r3, r3, #11#endif @ determine how to shift the value. subs r2, r3, #32 bge 2f adds r2, r2, #12 ble 1f @ shift value left 21 to 31 bits, or actually right 11 to 1 bits @ since a register switch happened above. add ip, r2, #20 rsb r2, r2, #12 mov xl, xh, lsl ip mov xh, xh, lsr r2 b 3f @ actually shift value left 1 to 20 bits, which might also represent @ 32 to 52 bits if counting the register switch that happened earlier.1: add r2, r2, #202: rsble ip, r2, #32 mov xh, xh, lsl r2 orrle xh, xh, xl, lsr ip movle xl, xl, lsl r2 @ adjust exponent accordingly.3: subs r4, r4, r3, lsl #20 bgt LSYM(Lad_e) @ Exponent too small, denormalize result. @ Find out proper shift value. mvn r4, r4, asr #20 subs r4, r4, #30 bge 2f adds r4, r4, #12 bgt 1f @ shift result right of 1 to 20 bits, sign is in r5. add r4, r4, #20 rsb r2, r4, #32 mov xl, xl, lsr r4 orr xl, xl, xh, lsl r2 orr xh, r5, xh, lsr r4 RETLDM "r4, r5" @ shift result right of 21 to 31 bits, or left 11 to 1 bits after @ a register switch from xh to xl.1: rsb r4, r4, #12 rsb r2, r4, #32 mov xl, xl, lsr r2 orr xl, xl, xh, lsl r4 mov xh, r5 RETLDM "r4, r5" @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch @ from xh to xl.2: mov xl, xh, lsr r4 mov xh, r5 RETLDM "r4, r5" @ Adjust exponents for denormalized arguments.LSYM(Lad_d): teq r4, #0 eoreq xh, xh, #0x00100000 addeq r4, r4, #(1 << 20) eor yh, yh, #0x00100000 subne r5, r5, #(1 << 20) b LSYM(Lad_x) @ Result is x - x = 0, unless x = INF or NAN.LSYM(Lad_z): sub ip, ip, #0x00100000 @ ip becomes 0x7ff00000 and r2, xh, ip teq r2, ip orreq xh, ip, #0x00080000 movne xh, #0 mov xl, #0 RET @ Overflow: return INF.LSYM(Lad_o): orr xh, r5, #0x7f000000 orr xh, xh, #0x00f00000 mov xl, #0 RETLDM "r4, r5" @ At least one of x or y is INF/NAN. @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN) @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN) @ if either is NAN: return NAN @ if opposite sign: return NAN @ return xh-xl (which is INF or -INF)LSYM(Lad_i): teq r4, ip movne xh, yh movne xl, yl teqeq r5, ip RETLDM "r4, r5" ne orrs r4, xl, xh, lsl #12 orreqs r4, yl, yh, lsl #12 teqeq xh, yh orrne xh, r5, #0x00080000 movne xl, #0 RETLDM "r4, r5" FUNC_END subdf3 FUNC_END adddf3ARM_FUNC_START floatunsidf teq r0, #0 moveq r1, #0 RETc(eq) stmfd sp!, {r4, r5, lr} mov r4, #(0x400 << 20) @ initial exponent add r4, r4, #((52-1) << 20) mov r5, #0 @ sign bit is 0 mov xl, r0 mov xh, #0 b LSYM(Lad_l) FUNC_END floatunsidfARM_FUNC_START floatsidf teq r0, #0 moveq r1, #0 RETc(eq) stmfd sp!, {r4, r5, lr} mov r4, #(0x400 << 20) @ initial exponent add r4, r4, #((52-1) << 20) ands r5, r0, #0x80000000 @ sign bit in r5 rsbmi r0, r0, #0 @ absolute value mov xl, r0 mov xh, #0 b LSYM(Lad_l) FUNC_END floatsidfARM_FUNC_START extendsfdf2 movs r2, r0, lsl #1 beq 1f @ value is 0.0 or -0.0 mov xh, r2, asr #3 @ stretch exponent mov xh, xh, rrx @ retrieve sign bit mov xl, r2, lsl #28 @ retrieve remaining bits ands r2, r2, #0xff000000 @ isolate exponent beq 2f @ exponent was 0 but not mantissa teq r2, #0xff000000 @ check if INF or NAN eorne xh, xh, #0x38000000 @ fixup exponent otherwise. RET1: mov xh, r0 mov xl, #0 RET2: @ value was denormalized. We can normalize it now. stmfd sp!, {r4, r5, lr} mov r4, #(0x380 << 20) @ setup corresponding exponent add r4, r4, #(1 << 20) and r5, xh, #0x80000000 @ move sign bit in r5 bic xh, xh, #0x80000000 b LSYM(Lad_l) FUNC_END extendsfdf2#endif /* L_addsubdf3 */#ifdef L_muldivdf3ARM_FUNC_START muldf3 stmfd sp!, {r4, r5, r6, lr} @ Mask out exponents. mov ip, #0x7f000000 orr ip, ip, #0x00f00000 and r4, xh, ip and r5, yh, ip @ Trap any INF/NAN. teq r4, ip teqne r5, ip beq LSYM(Lml_s) @ Trap any multiplication by 0. orrs r6, xl, xh, lsl #1 orrnes r6, yl, yh, lsl #1 beq LSYM(Lml_z) @ Shift exponents right one bit to make room for overflow bit. @ If either of them is 0, scale denormalized arguments off line. @ Then add both exponents together. movs r4, r4, lsr #1 teqne r5, #0 beq LSYM(Lml_d)LSYM(Lml_x): add r4, r4, r5, asr #1 @ Preserve final sign in r4 along with exponent for now. teq xh, yh orrmi r4, r4, #0x8000 @ Convert mantissa to unsigned integer. bic xh, xh, ip, lsl #1 bic yh, yh, ip, lsl #1 orr xh, xh, #0x00100000 orr yh, yh, #0x00100000#if __ARM_ARCH__ < 4 @ Well, no way to make it shorter without the umull instruction. @ We must perform that 53 x 53 bit multiplication by hand. stmfd sp!, {r7, r8, r9, sl, fp} mov r7, xl, lsr #16 mov r8, yl, lsr #16 mov r9, xh, lsr #16 mov sl, yh, lsr #16 bic xl, xl, r7, lsl #16 bic yl, yl, r8, lsl #16 bic xh, xh, r9, lsl #16 bic yh, yh, sl, lsl #16 mul ip, xl, yl mul fp, xl, r8 mov lr, #0 adds ip, ip, fp, lsl #16 adc lr, lr, fp, lsr #16 mul fp, r7, yl adds ip, ip, fp, lsl #16 adc lr, lr, fp, lsr #16 mul fp, xl, sl mov r5, #0 adds lr, lr, fp, lsl #16 adc r5, r5, fp, lsr #16 mul fp, r7, yh adds lr, lr, fp, lsl #16 adc r5, r5, fp, lsr #16 mul fp, xh, r8 adds lr, lr, fp, lsl #16 adc r5, r5, fp, lsr #16 mul fp, r9, yl adds lr, lr, fp, lsl #16 adc r5, r5, fp, lsr #16 mul fp, xh, sl mul r6, r9, sl adds r5, r5, fp, lsl #16 adc r6, r6, fp, lsr #16 mul fp, r9, yh adds r5, r5, fp, lsl #16 adc r6, r6, fp, lsr #16 mul fp, xl, yh adds lr, lr, fp mul fp, r7, sl adcs r5, r5, fp mul fp, xh, yl adc r6, r6, #0 adds lr, lr, fp mul fp, r9, r8 adcs r5, r5, fp mul fp, r7, r8 adc r6, r6, #0 adds lr, lr, fp mul fp, xh, yh adcs r5, r5, fp adc r6, r6, #0 ldmfd sp!, {r7, r8, r9, sl, fp}#else @ Here is the actual multiplication: 53 bits * 53 bits -> 106 bits. umull ip, lr, xl, yl mov r5, #0 umlal lr, r5, xl, yh umlal lr, r5, xh, yl mov r6, #0 umlal r5, r6, xh, yh#endif @ The LSBs in ip are only significant for the final rounding. @ Fold them into one bit of lr. teq ip, #0 orrne lr, lr, #1 @ Put final sign in xh. mov xh, r4, lsl #16 bic r4, r4, #0x8000 @ Adjust result if one extra MSB appeared (one of four times). tst r6, #(1 << 9) beq 1f add r4, r4, #(1 << 19) movs r6, r6, lsr #1 movs r5, r5, rrx movs lr, lr, rrx orrcs lr, lr, #11: @ Scale back to 53 bits. @ xh contains sign bit already. orr xh, xh, r6, lsl #12 orr xh, xh, r5, lsr #20 mov xl, r5, lsl #12 orr xl, xl, lr, lsr #20 @ Apply exponent bias, check range for underflow. sub r4, r4, #0x00f80000 subs r4, r4, #0x1f000000 ble LSYM(Lml_u) @ Round the result. movs lr, lr, lsl #12 bpl 1f adds xl, xl, #1 adc xh, xh, #0 teq lr, #0x80000000 biceq xl, xl, #1 @ Rounding may have produced an extra MSB here. @ The extra bit is cleared before merging the exponent below. tst xh, #0x00200000 addne r4, r4, #(1 << 19)1: @ Check exponent for overflow. adds ip, r4, #(1 << 19) tst ip, #(1 << 30) bne LSYM(Lml_o) @ Add final exponent. bic xh, xh, #0x00300000 orr xh, xh, r4, lsl #1 RETLDM "r4, r5, r6" @ Result is 0, but determine sign anyway.LSYM(Lml_z): eor xh, xh, yhLSYM(Ldv_z): bic xh, xh, #0x7fffffff mov xl, #0 RETLDM "r4, r5, r6" @ Check if denormalized result is possible, otherwise return signed 0.LSYM(Lml_u): cmn r4, #(53 << 19) movle xl, #0 bicle xh, xh, #0x7fffffff
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?