📄 ppc_fdivs.s
字号:
/* fpopt/ppc_fdivs.S, pl_FPE_common, pl_linux 11/24/03 16:17:33 */
/*----------------------------------------------------------------------------- */
/* Copyright (c) 2003, IBM Corporation */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* * Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* * Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* * Neither the name of IBM nor the names of its contributors */
/* may be used to endorse or promote products derived from this */
/* software without specific prior written permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */
/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */
/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */
/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */
/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */
/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* */
/*----------------------------------------------------------------------------- */
/* */
/* Function: divide two single floating point values. frt = fpa / fpb */
/* Input: r3(fpa), r4(fpb) */
/* Output: r3(frt) */
/* Notes: 1. No stack frame is created for the normal case of this function, */
/* so the following registers must be preserved, as required by */
/* ABI specification: */
/* LR, CR0, R1, R2, R13-R31 */
/* 2. For the full function case, a stack frame is created, following */
/* ABI specification, and non-volatile registers are saved and */
/* restored on the stack. */
/* 3. operation performed according to IEEE754-1985 standard with */
/* rounding mode = nearest even. */
/* */
/*----------------------------------------------------------------------------- */
#include <ppc4xx.inc>
#include "fpeLib.inc"
#define first_loop 4
function_prolog(__divsf3)
mfcr r0 /* save cr */
/* load fpa into r8,r9,r10 and cr6. load fpb into r11, r12, r13 and cr7 */
mr r9,r3 /* load fpa.exp, fpa.S, fpa.hi */
mr r12,r4 /* load fpb.exp, fpb.S, fpb.hi */
rlwinm r8,r9,9,0xff /* isolate exponent of fpa into r8 */
rlwinm r11,r12,9,0xff /* isolate exponent of fpb into r11 */
cmpwi cr6,r9,0 /* set fpa.sign */
cmpwi cr7,r12,0 /* set fpb.sign */
/* test for normal: */
cmpwi cr0,r8,0xff /* if (fpa.exp == SEXPMAX) */
cmpwi cr1,r8,0 /* if (fpa.exp == 0) */
cmpwi cr2,r11,0xff /* if (fpb.exp == SEXPMAX) */
cmpwi cr3,r11,0 /* if (fpb.exp == 0) */
beq full
beq cr1,full
beq cr2,full
beq cr3,full
/* normal case only: */
oris r9,r9,0x0080 /* fpa.hi |= 0x00800000; */
oris r12,r12,0x0080 /* fpb.hi |= 0x00800000; */
/* Calculate exponent */
subf r8,r11,r8 /* result.exp = fpa.exp - fpb.exp; */
/* ------- note: r11 is now free */
addic. r8,r8,+127 /* result.exp += SEXPBIAS; check for negative */
/* result.exp += 1 (for dividing left-just */
/* numbers if result is left-just) */
cmpwi cr2,r8,0xff /* if (result.exp) >= SEXPMAX */
ble full /* or less than minimum */
bge cr2,full
/* Zero out the non-number part of the operands: */
/* for the sign bit plus one working bit: */
rlwinm r9,r9,0,8,31 /* r9 = fpa */
rlwinm r12,r12,0,8,31 /* r12 = fpb */
/* Divide the numbers (25 bits of long division); */
/* r5 holds the intermediate differences. */
li r5,25 /* calculate 24 signif + 1 sticky */
mtctr r5
longdivloop:
/* r5 = r9 - r12: */
subfc. r5,r12,r9
adde r11,r11,r11 /* rot result left and put CA into LSb of result */
blt+ zerobit
/* if intermed diff positive then: */
adde r9,r5,r5 /* r9 gets r5 << 1 */
bdnz longdivloop
b done25
zerobit: /* else: */
adde r9,r9,r9 /* r9 gets r9 << 1 */
bdnz longdivloop
done25:
/* Since we divided one left-justified number by another left-justified */
/* number, there are two possibilities for the result: */
/* either (the msb is 1) or */
/* (the msb is 0 and the next bit is 1). If the msb is 0, then we need to */
/* calculate one more bit (to make 25 significant bits). */
/* NOTE: the msb is now in bit 7 (=32-25) of result=r11, because we have */
/* calculated 25 bits. */
rlwinm. r5,r11,0,7,7 /* test hi-bit = bit 7 of result */
bne+ highbit_is_1
addic. r8,r8,-1 /* shifted case: exponent is 1 less than */
/* non-shifted. */
beq full /* exponent=0 is special case */
addi r5,0,1
mtctr r5
b longdivloop
highbit_is_1:
/* We now have the first 25 bits of the result. Bits beyond 24 are only */
/* needed to determine rounding. If bit 25 (the last one just calculated) */
/* is 0, then we don't round. If it is 1, then we'll calculate up to 12 more */
/* bits, looking for a 1. The first time we see a 1, we know to round up. */
/* But even if all 12 are 0, we still don't know for sure to not round, */
/* so we punt to the full routine. */
rlwinm r5,r11,0,30,31
cmpwi r5,1
bne roundup
/* At this point the LSb is 0 and the sticky bit is 1; we need more bits */
/* to determine rounding -- if any subsequent bit is 1, then we round up. */
/* We'll try 12 more bits before giving up: */
addi r5,0,12
mtctr r5
seek1loop:
/* r5 = r9 - r12: */
subfc. r5,r12,r9
bge roundup
/* (result(this bit) = 0) */
adde r9,r9,r9 /* r9 gets r9 << 1 */
bdnz seek1loop
b full
roundup: /* adding 1 to the sticky bit performs standard */
addi r11,r11,1 /* rounding; i.e. if sticky=0 then other bits */
/* We're done! Just assemble the result back into IEEE format: */
/* We need to get the msb from bit 7 to bit 8 of the result, so shift 1 */
srwi r3,r11,1
/* Put the exponent into bits 1-8 of the MSW: */
rlwimi r3,r8,23,1,8
/* Calculate sign bit: */
crxor cr6_sign,cr6_sign,cr7_sign
bf cr6_sign,signbit0
oris r3,r3,0x8000 /* set sign bit */
signbit0:
mtcr r0 /* restore cr */
blr /* return */
/*---------------- end of normal-only case --------------- */
full:
mtcr r0 /* restore CR */
mflr r0 /* save link register in caller's stack */
stw r0,4(r1)
stwu r1,-STACKSIZE(r1) /* set up stack frame to save regs */
SAVEREG(19) /* save r19 */
mfcr r19 /* save cr */
/* load fpa into r8,r9,r10 and cr6. load fpb into r11, r12, r4 and cr7 */
mr r9,r3 /* load fpa.exp, fpa.S, fpa.hi */
mr r12,r4 /* load fpb.exp, fpb.S, fpb.hi */
rlwinm r8,r9,9,0xff /* isolate exponent of fpa */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -