📄 gcc-3.3.2-arm-softfloat.patch
字号:
+ bic xh, xh, ip, lsl #1+ bic yh, yh, ip, lsl #1+ orr xh, xh, #0x00100000+ orr yh, yh, #0x00100000++#if __ARM_ARCH__ < 4++ @ Well, no way to make it shorter without the umull instruction.+ @ We must perform that 53 x 53 bit multiplication by hand.+ stmfd sp!, {r7, r8, r9, sl, fp}+ mov r7, xl, lsr #16+ mov r8, yl, lsr #16+ mov r9, xh, lsr #16+ mov sl, yh, lsr #16+ bic xl, xl, r7, lsl #16+ bic yl, yl, r8, lsl #16+ bic xh, xh, r9, lsl #16+ bic yh, yh, sl, lsl #16+ mul ip, xl, yl+ mul fp, xl, r8+ mov lr, #0+ adds ip, ip, fp, lsl #16+ adc lr, lr, fp, lsr #16+ mul fp, r7, yl+ adds ip, ip, fp, lsl #16+ adc lr, lr, fp, lsr #16+ mul fp, xl, sl+ mov r5, #0+ adds lr, lr, fp, lsl #16+ adc r5, r5, fp, lsr #16+ mul fp, r7, yh+ adds lr, lr, fp, lsl #16+ adc r5, r5, fp, lsr #16+ mul fp, xh, r8+ adds lr, lr, fp, lsl #16+ adc r5, r5, fp, lsr #16+ mul fp, r9, yl+ adds lr, lr, fp, lsl #16+ adc r5, r5, fp, lsr #16+ mul fp, xh, sl+ mul r6, r9, sl+ adds r5, r5, fp, lsl #16+ adc r6, r6, fp, lsr #16+ mul fp, r9, yh+ adds r5, r5, fp, lsl #16+ adc r6, r6, fp, lsr #16+ mul fp, xl, yh+ adds lr, lr, fp+ mul fp, r7, sl+ adcs r5, r5, fp+ mul fp, xh, yl+ adc r6, r6, #0+ adds lr, lr, fp+ mul fp, r9, r8+ adcs r5, r5, fp+ mul fp, r7, r8+ adc r6, r6, #0+ adds lr, lr, fp+ mul fp, xh, yh+ adcs r5, r5, fp+ adc r6, r6, #0+ ldmfd sp!, {r7, r8, r9, sl, fp}++#else++ @ Here is the actual multiplication: 53 bits * 53 bits -> 106 bits.+ umull ip, lr, xl, yl+ mov r5, #0+ umlal lr, r5, xl, yh+ umlal lr, r5, xh, yl+ mov r6, #0+ umlal r5, r6, xh, yh++#endif++ @ The LSBs in ip are only significant for the final rounding.+ @ Fold them into one bit of lr.+ teq ip, #0+ orrne lr, lr, #1++ @ Put final sign in xh.+ mov xh, r4, lsl #16+ bic r4, r4, #0x8000++ @ Adjust result if one extra MSB appeared (one of four times).+ tst r6, #(1 << 9)+ beq 1f+ add r4, r4, #(1 << 19)+ movs r6, r6, lsr #1+ movs r5, r5, rrx+ movs lr, lr, rrx+ orrcs lr, lr, #1+1:+ @ Scale back to 53 bits.+ @ xh contains sign bit already.+ orr xh, xh, r6, lsl #12+ orr xh, xh, r5, lsr #20+ mov xl, r5, lsl #12+ orr xl, xl, lr, lsr #20++ @ Apply exponent bias, check range for underflow.+ sub r4, r4, #0x00f80000+ subs r4, r4, #0x1f000000+ ble LSYM(Lml_u)++ @ Round the result.+ movs lr, lr, lsl #12+ bpl 1f+ adds xl, xl, #1+ adc xh, xh, #0+ teq lr, #0x80000000+ biceq xl, xl, #1++ @ Rounding may have produced an extra MSB here.+ @ The extra bit is cleared before merging the exponent below.+ tst xh, #0x00200000+ addne r4, r4, #(1 << 19)+1:+ @ Check exponent for overflow.+ adds ip, r4, #(1 << 19)+ tst ip, #(1 << 30)+ bne LSYM(Lml_o)++ @ Add final exponent.+ bic xh, xh, #0x00300000+ orr xh, xh, r4, lsl #1+ RETLDM "r4, r5, r6"++ @ Result is 0, but determine sign anyway.+LSYM(Lml_z):+ eor xh, xh, yh+LSYM(Ldv_z):+ bic xh, xh, #0x7fffffff+ mov xl, #0+ RETLDM "r4, r5, r6"++ @ Check if denormalized result is possible, otherwise return signed 0.+LSYM(Lml_u):+ cmn r4, #(53 << 19)+ movle xl, #0+ bicle xh, xh, #0x7fffffff+ RETLDM "r4, r5, r6" le++ @ Find out proper shift value.+LSYM(Lml_r):+ mvn r4, r4, asr #19+ subs r4, r4, #30+ bge 2f+ adds r4, r4, #12+ bgt 1f++ @ shift result right of 1 to 20 bits, preserve sign bit, round, etc.+ add r4, r4, #20+ rsb r5, r4, #32+ mov r3, xl, lsl r5+ mov xl, xl, lsr r4+ orr xl, xl, xh, lsl r5+ movs xh, xh, lsl #1+ mov xh, xh, lsr r4+ mov xh, xh, rrx+ adds xl, xl, r3, lsr #31+ adc xh, xh, #0+ teq lr, #0+ teqeq r3, #0x80000000+ biceq xl, xl, #1+ RETLDM "r4, r5, r6"++ @ shift result right of 21 to 31 bits, or left 11 to 1 bits after+ @ a register switch from xh to xl. Then round.+1: rsb r4, r4, #12+ rsb r5, r4, #32+ mov r3, xl, lsl r4+ mov xl, xl, lsr r5+ orr xl, xl, xh, lsl r4+ bic xh, xh, #0x7fffffff+ adds xl, xl, r3, lsr #31+ adc xh, xh, #0+ teq lr, #0+ teqeq r3, #0x80000000+ biceq xl, xl, #1+ RETLDM "r4, r5, r6"++ @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch+ @ from xh to xl. Leftover bits are in r3-r6-lr for rounding.+2: rsb r5, r4, #32+ mov r6, xl, lsl r5+ mov r3, xl, lsr r4+ orr r3, r3, xh, lsl r5+ mov xl, xh, lsr r4+ bic xh, xh, #0x7fffffff+ adds xl, xl, r3, lsr #31+ adc xh, xh, #0+ orrs r6, r6, lr+ teqeq r3, #0x80000000+ biceq xl, xl, #1+ RETLDM "r4, r5, r6"++ @ One or both arguments are denormalized.+ @ Scale them leftwards and preserve sign bit.+LSYM(Lml_d):+ mov lr, #0+ teq r4, #0+ bne 2f+ and r6, xh, #0x80000000+1: movs xl, xl, lsl #1+ adc xh, lr, xh, lsl #1+ tst xh, #0x00100000+ subeq r4, r4, #(1 << 19)+ beq 1b+ orr xh, xh, r6+ teq r5, #0+ bne LSYM(Lml_x)+2: and r6, yh, #0x80000000+3: movs yl, yl, lsl #1+ adc yh, lr, yh, lsl #1+ tst yh, #0x00100000+ subeq r5, r5, #(1 << 20)+ beq 3b+ orr yh, yh, r6+ b LSYM(Lml_x)++ @ One or both args are INF or NAN.+LSYM(Lml_s):+ orrs r6, xl, xh, lsl #1+ orrnes r6, yl, yh, lsl #1+ beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN+ teq r4, ip+ bne 1f+ orrs r6, xl, xh, lsl #12+ bne LSYM(Lml_n) @ NAN * <anything> -> NAN+1: teq r5, ip+ bne LSYM(Lml_i)+ orrs r6, yl, yh, lsl #12+ bne LSYM(Lml_n) @ <anything> * NAN -> NAN++ @ Result is INF, but we need to determine its sign.+LSYM(Lml_i):+ eor xh, xh, yh++ @ Overflow: return INF (sign already in xh).+LSYM(Lml_o):+ and xh, xh, #0x80000000+ orr xh, xh, #0x7f000000+ orr xh, xh, #0x00f00000+ mov xl, #0+ RETLDM "r4, r5, r6"++ @ Return NAN.+LSYM(Lml_n):+ mov xh, #0x7f000000+ orr xh, xh, #0x00f80000+ RETLDM "r4, r5, r6"++ FUNC_END muldf3++ARM_FUNC_START divdf3++ stmfd sp!, {r4, r5, r6, lr}++ @ Mask out exponents.+ mov ip, #0x7f000000+ orr ip, ip, #0x00f00000+ and r4, xh, ip+ and r5, yh, ip++ @ Trap any INF/NAN or zeroes.+ teq r4, ip+ teqne r5, ip+ orrnes r6, xl, xh, lsl #1+ orrnes r6, yl, yh, lsl #1+ beq LSYM(Ldv_s)++ @ Shift exponents right one bit to make room for overflow bit.+ @ If either of them is 0, scale denormalized arguments off line.+ @ Then substract divisor exponent from dividend''s.+ movs r4, r4, lsr #1+ teqne r5, #0+ beq LSYM(Ldv_d)+LSYM(Ldv_x):+ sub r4, r4, r5, asr #1++ @ Preserve final sign into lr.+ eor lr, xh, yh++ @ Convert mantissa to unsigned integer.+ @ Dividend -> r5-r6, divisor -> yh-yl.+ mov r5, #0x10000000+ mov yh, yh, lsl #12+ orr yh, r5, yh, lsr #4+ orr yh, yh, yl, lsr #24+ movs yl, yl, lsl #8+ mov xh, xh, lsl #12+ teqeq yh, r5+ beq LSYM(Ldv_1)+ orr r5, r5, xh, lsr #4+ orr r5, r5, xl, lsr #24+ mov r6, xl, lsl #8++ @ Initialize xh with final sign bit.+ and xh, lr, #0x80000000++ @ Ensure result will land to known bit position.+ cmp r5, yh+ cmpeq r6, yl+ bcs 1f+ sub r4, r4, #(1 << 19)+ movs yh, yh, lsr #1+ mov yl, yl, rrx+1:+ @ Apply exponent bias, check range for over/underflow.+ add r4, r4, #0x1f000000+ add r4, r4, #0x00f80000+ cmn r4, #(53 << 19)+ ble LSYM(Ldv_z)+ cmp r4, ip, lsr #1+ bge LSYM(Lml_o)++ @ Perform first substraction to align result to a nibble.+ subs r6, r6, yl+ sbc r5, r5, yh+ movs yh, yh, lsr #1+ mov yl, yl, rrx+ mov xl, #0x00100000+ mov ip, #0x00080000++ @ The actual division loop.+1: subs lr, r6, yl+ sbcs lr, r5, yh+ subcs r6, r6, yl+ movcs r5, lr+ orrcs xl, xl, ip+ movs yh, yh, lsr #1+ mov yl, yl, rrx+ subs lr, r6, yl+ sbcs lr, r5, yh+ subcs r6, r6, yl+ movcs r5, lr+ orrcs xl, xl, ip, lsr #1+ movs yh, yh, lsr #1+ mov yl, yl, rrx+ subs lr, r6, yl+ sbcs lr, r5, yh+ subcs r6, r6, yl+ movcs r5, lr+ orrcs xl, xl, ip, lsr #2+ movs yh, yh, lsr #1+ mov yl, yl, rrx+ subs lr, r6, yl+ sbcs lr, r5, yh+ subcs r6, r6, yl+ movcs r5, lr+ orrcs xl, xl, ip, lsr #3++ orrs lr, r5, r6+ beq 2f+ mov r5, r5, lsl #4+ orr r5, r5, r6, lsr #28+ mov r6, r6, lsl #4+ mov yh, yh, lsl #3+ orr yh, yh, yl, lsr #29+ mov yl, yl, lsl #3+ movs ip, ip, lsr #4+ bne 1b++ @ We are done with a word of the result.+ @ Loop again for the low word if this pass was for the high word.+ tst xh, #0x00100000+ bne 3f+ orr xh, xh, xl+ mov xl, #0+ mov ip, #0x80000000+ b 1b+2:+ @ Be sure result starts in the high word.+ tst xh, #0x00100000+ orreq xh, xh, xl+ moveq xl, #0+3:+ @ Check if denormalized result is needed.+ cmp r4, #0+ ble LSYM(Ldv_u)++ @ Apply proper rounding.+ subs ip, r5, yh+ subeqs ip, r6, yl+ adcs xl, xl, #0+ adc xh, xh, #0+ teq ip, #0+ biceq xl, xl, #1++ @ Add exponent to result.+ bic xh, xh, #0x00100000+ orr xh, xh, r4, lsl #1+ RETLDM "r4, r5, r6"++ @ Division by 0x1p*: shortcut a lot of code.+LSYM(Ldv_1):+ and lr, lr, #0x80000000+ orr xh, lr, xh, lsr #12+ add r4, r4, #0x1f000000+ add r4, r4, #0x00f80000+ cmp r4, ip, lsr #1+ bge LSYM(Lml_o)+ cmp r4, #0+ orrgt xh, xh, r4, lsl #1+ RETLDM "r4, r5, r6" gt++ cmn r4, #(53 << 19)+ ble LSYM(Ldv_z)+ orr xh, xh, #0x00100000+ mov lr, #0+ b LSYM(Lml_r)++ @ Result must be denormalized: put remainder in lr for+ @ rounding considerations.+LSYM(Ldv_u):+ orr lr, r5, r6+ b LSYM(Lml_r)++ @ One or both arguments are denormalized.+ @ Scale them leftwards and preserve sign bit.+LSYM(Ldv_d):+ mov lr, #0+ teq r4, #0+ bne 2f+ and r6, xh, #0x80000000+1: movs xl, xl, lsl #1+ adc xh, lr, xh, lsl #1+ tst xh, #0x00100000+ subeq r4, r4, #(1 << 19)+ beq 1b+ orr xh, xh, r6+ teq r5, #0+ bne LSYM(Ldv_x)+2: and r6, yh, #0x80000000+3: movs yl, yl, lsl #1+ adc yh, lr, yh, lsl #1+ tst yh, #0x00100000+ subeq r5, r5, #(1 << 20)+ beq 3b+ orr yh, yh, r6+ b LSYM(Ldv_x)++ @ One or both arguments is either INF, NAN or zero.+LSYM(Ldv_s):+ teq r4, ip+ teqeq r5, ip+ beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN+ teq r4, ip+ bne 1f+ orrs r4, xl, xh, lsl #12+ bne LSYM(Lml_n) @ NAN / <anything> -> NAN+ b LSYM(Lml_i) @ INF / <anything> -> INF+1: teq r5, ip+ bne 2f+ orrs r5, yl, yh, lsl #12+ bne LSYM(Lml_n) @ <anything> / NAN -> NAN+ b LSYM(Lml_z) @ <anything> / INF -> 0+2: @ One or both arguments are 0.+ orrs r4, xl, xh, lsl #1+ bne LSYM(Lml_i) @ <non_zero> / 0 -> INF+ orrs r5, yl, yh, lsl #1+ bne LSYM(Lml_z) @ 0 / <non_zero> -> 0+ b LSYM(Lml_n) @ 0 / 0 -> NAN++ FUNC_END divdf3++#endif /* L_muldivdf3 */++#ifdef L_cmpdf2++FUNC_START gedf2+ARM_FUNC_START gtdf2+ mov ip, #-1+ b 1f++FUNC_START ledf2+ARM_FUNC_START ltdf2+ mov ip, #1+ b 1f++FUNC_START nedf2+FUNC_START eqdf2+ARM_FUNC_START cmpdf2+ mov ip, #1 @ how should we specify unordered here?++1: stmfd sp!, {r4, r5, lr}++ @ Trap any INF/NAN first.+ mov lr, #0x7f000000+ orr lr, lr, #0x00f00000+ and r4, xh, lr+ and r5, yh, lr+ teq r4, lr+ teqne r5, lr+ beq 3f++ @ Test for equality.+ @ Note that 0.0 is equal to -0.0.+2: orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0+ orreqs ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0+ teqne xh, yh @ or xh == yh+ teqeq xl, yl @ and xl == yl+ moveq r0, #0 @ then equal.+ RETLDM "r4, r5" eq++ @ Check for sign difference.+ teq xh, yh+ movmi r0, xh, asr #31+ orrmi r0, r0, #1+ RETLDM "r4, r5" mi++ @ Compare exponents.+ cmp r4, r5++ @ Compare mantissa if exponents are equal.+ moveq xh, xh, lsl #12+ cmpeq xh, yh, lsl #12+ cmpeq xl, yl+ movcs r0, yh, asr #31+ mvncc r0, yh, asr #31+ orr r0, r0, #1+ RETLDM "r4, r5"++ @ Look for a NAN.+3: teq r4, lr+ bne 4f+ orrs xl, xl, xh, lsl #12+ bne 5f @ x is NAN+4: teq r5, lr+ bne 2b+ orrs yl, yl, yh, lsl #12+ beq 2b @ y is not NAN+5: mov r0, ip @ return unordered code from ip+ RETLDM "r4, r5"++ FUNC_END gedf2+ FUNC_END gtdf2+ FUNC_END ledf2+ FUNC_END ltdf2+ FUNC_END nedf2+ FUNC_END eqdf2+ FUNC_END cmpdf2++#endif /* L_cmpdf2 */++#ifdef L_unorddf2++ARM_FUNC_START unorddf2+ str lr, [sp, #-4]!+ mov ip, #0x7f000000+ orr ip, ip, #0x00f00000+ and lr, xh, ip+ teq lr, ip+ bne 1f+ orrs xl, xl, xh, lsl #12+ bne 3f @ x is NAN+1: and lr, yh, ip+ teq lr, ip+ bne 2f+ orrs yl, yl, yh, lsl #12+ bne 3f @ y is NAN+2: mov r0, #0 @ arguments are ordered.+ RETLDM++3: mov r0, #1 @ arguments are unordered.+ RETLDM++ FUNC_END unorddf2++#endif /* L_unorddf2 */++#ifdef L_fixdfsi++ARM_FUNC_START fixdfsi+ orrs ip, xl, xh, lsl #1+ beq 1f @ value is 0.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -