📄 ppc_fdiv.s
字号:
/* fpopt/ppc_fdiv.S, pl_FPE_common, pl_linux 11/24/03 16:17:31 */
/*----------------------------------------------------------------------------- */
/* Copyright (c) 2003, IBM Corporation */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* * Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* * Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* * Neither the name of IBM nor the names of its contributors */
/* may be used to endorse or promote products derived from this */
/* software without specific prior written permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */
/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */
/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */
/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */
/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */
/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* */
/*----------------------------------------------------------------------------- */
/* */
/* Function: divide two double floating point values. frt = fpa / fpb */
/* Input: r3,r4(fpa) */
/* r5,r6(fpb) */
/* Output: r3,r4(frt) */
/* Notes: 1. A stack frame is created, following ABI specification, and */
/* non-volatile registers are saved and restored on the stack. */
/* 2. operation performed according to IEEE754-1985 standard with */
/* rounding mode = nearest even. */
/* */
/*----------------------------------------------------------------------------- */
#include <ppc4xx.inc>
#include "fpeLib.inc"
#define first_loop 4
function_prolog(__divdf3)
mflr r0 /* save link register in caller's stack */
stw r0,4(r1)
stwu r1,-STACKSIZE(r1) /* set up stack frame to hold saved regs */
SAVEREG(30)
SAVEREG(31)
mfcr r0 /* save cr */
/* load fpa into r8,r9,r10 and cr6. load fpb into r11, r12, r13 and cr7 */
mr r9,r3 /* load fpa.exp, fpa.S, fpa.hi */
mr r12,r5 /* load fpb.exp, fpb.S, fpb.hi */
rlwinm r8,r9,32-20,0x7ff /* isolate exponent of fpa into r8 */
rlwinm r11,r12,32-20,0x7ff /* isolate exponent of fpb into r11 */
mr r10,r4 /* load fpa.lo */
cmpwi cr6,r9,0 /* set fpa.sign */
/* load fpb.lo (already in r6)*/
cmpwi cr7,r12,0 /* set fpb.sign */
/* test for normal: */
cmpwi cr0,r8,0x7ff /* if (fpa.exp == DEXPMAX) */
cmpwi cr1,r8,0 /* if (fpa.exp == 0) */
cmpwi cr2,r11,0x7ff /* if (fpb.exp == DEXPMAX) */
cmpwi cr3,r11,0 /* if (fpb.exp == 0) */
beq full
beq cr1,full
beq cr2,full
beq cr3,full
/* normal case only: */
oris r9,r9,0x0010 /* fpa.hi |= 0x00100000; */
oris r12,r12,0x0010 /* fpb.hi |= 0x00100000; */
/* Calculate exponent */
subf r8,r11,r8 /* result.exp = fpa.exp - fpb.exp; */
/* ------- note: r11 is now free */
addic. r8,r8,+1023 /* result.exp += DEXPBIAS; check for negative */
/* result.exp += 1 (for dividing left-just */
/* numbers if result is left-just) */
cmpwi cr2,r8,0x7ff /* if (result.exp >= DEXPMAX) */
ble full
bge cr2, full /* cr2 */
/* Zero out the non-number part of the operands: */
/* for the sign bit plus one working bit: */
rlwinm r9,r9,0,11,31 /* r9 = fpa.hi */
/* r10 = fpa.lo */
rlwinm r12,r12,0,11,31 /* r12 = fpb.hi */
/* r6 = fpb.lo */
/* Divide the numbers (54 bits of long division); */
/* r7/r11 hold the intermediate differences. */
li r7,54 /* calculate 53 signif + 1 sticky */
mtctr r7
longdivloop:
/* r7/r11 = r9/r10 - r12/r6: */
subfc r11,r6,r10
subfe. r7,r12,r9
adde r31,r31,r31 /* rot result left and put CA into LSb of result */
adde r30,r30,r30 /* rotate r30/r31 left 1 bit */
blt+ zerobit
/* if intermed diff positive then: */
addc r10,r11,r11 /* r9/r10 get r7/r11 << 1 */
adde r9,r7,r7 /* */
bdnz longdivloop
b done54
zerobit: /* else: */
addc r10,r10,r10 /* r9/r10 get r9/r10 << 1 */
adde r9,r9,r9 /* */
bdnz longdivloop
done54:
/* Since we divided one left-justified number by another left-justified */
/* number, there are two possibilities for the result: */
/* either (the msb is 1) or */
/* (the msb is 0 and the next bit is 1). If the msb is 0, then we need to */
/* calculate one more bit (to make 54 significant bits). */
/* NOTE: the msb is now in bit 10 (=64-54) of result.hi=r30, because we have */
/* calculated 54 bits. */
rlwinm. r7,r30,0,10,10 /* test hi-bit = bit 10 of result.hi */
bne+ highbit_is_1
addic. r8,r8,-1 /* shifted case: exponent is 1 less than */
/* non-shifted. */
beq full /* exponent=0 is special case */
addi r7,0,1
mtctr r7
b longdivloop
highbit_is_1:
/* We now have the first 54 bits of the result. Bits beyond 53 are only */
/* needed to determine rounding. If bit 54 (the last one just calculated) */
/* is 0, then we don't round. If it is 1, then we'll calculate up to 12 more */
/* bits, looking for a 1. The first time we see a 1, we know to round up. */
/* But even if all 12 are 0, we still don't know for sure to not round, */
/* so we punt to the full routine. */
rlwinm r7,r31,0,30,31
cmpwi r7,1
bne roundup
/* At this point the LSb is 0 and the sticky bit is 1; we need more bits */
/* to determine rounding -- if any subsequent bit is 1, then we round up. */
/* We'll try 12 more bits before giving up: */
addi r7,0,12
mtctr r7
seek1loop:
/* r7/r11 = r9/r10 - r12/r6: */
subfc r11,r6,r10
subfe. r7,r12,r9
bge roundup
/* (result(this bit) = 0) */
addc r10,r10,r10 /* r9/r10 get r9/r10 << 1 */
adde r9,r9,r9 /* */
bdnz seek1loop
b full
roundup: /* adding 1 to the sticky bit performs standard */
addic r31,r31,1 /* rounding; i.e. if sticky=0 then other bits */
addze r30,r30 /* are not affected */
/* We're done! Just assemble the result back into IEEE format: */
/* r30 = IEEE MSW, r31 = IEEE LSW */
/* We need to get the msb from bit 10 to bit 11 of result.hi; so shift */
/* right 1 bit: */
rlwinm r31,r31,31,1,31 /* result >> 1 */
rlwimi r31,r30,31,0,0 /* */
rlwinm r30,r30,31,12,31 /* */
/* Put the exponent into bits 1-11 of the MSW: */
rlwimi r30,r8,20,1,11
/* Calculate sign bit: */
crxor cr6_sign,cr6_sign,cr7_sign
bf cr6_sign,signbit0
oris r30,r30,0x8000 /* set sign bit */
signbit0:
mr r3,r30 /* *FRT = fpa.hi; */
mr r4,r31 /* *FRT = fpa.lo; */
RESTREG(30)
RESTREG(31)
mtcr r0 /* restore cr */
return_common:
lwz r1,0(r1) /* previous stack frame */
lwz r0,4(r1) /* saved link register */
mtlr r0 /* restore link register */
blr /* return */
/*---------------- end of normal-only case --------------- */
full:
RESTREG(30)
RESTREG(31)
mtcr r0 /* restore CR */
SAVEREG(16) /* save r16 */
mfcr r16 /* save cr */
/* load fpa into r8,r9,r10 and cr6. load fpb into r11, r12, r6 and cr7 */
mr r9,r3 /* load fpa.exp, fpa.S, fpa.hi */
mr r12,r5 /* load fpb.exp, fpb.S, fpb.hi */
rlwinm r8,r9,32-20,0x7ff /* isolate exponent of fpa */
rlwinm r11,r12,32-20,0x7ff /* isolate exponent of fpb */
mr r10,r4 /* load fpa.lo */
cmpwi cr6,r9,0 /* set fpa.sign */
/* load fpb.lo (already in r6)*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -