📄 ppc_fmuls.s
字号:
/* fpopt/ppc_fmuls.S, pl_FPE_common, pl_linux 11/24/03 16:17:35 */
/*----------------------------------------------------------------------------- */
/* Copyright (c) 2003, IBM Corporation */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* * Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* * Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* * Neither the name of IBM nor the names of its contributors */
/* may be used to endorse or promote products derived from this */
/* software without specific prior written permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */
/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */
/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */
/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */
/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */
/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* */
/*----------------------------------------------------------------------------- */
/* */
/* Function: multiply two single floating point values (frt = fpa * fpb) */
/* Input: r3(fpa), r4(fpb) */
/* Output: r3(frt) */
/* Notes: 1. No stack frame is created for this function, so the following */
/* registers must be preserved, as required by ABI specification: */
/* LR, CR0, R1, R2, R13-R31 */
/* 2. operation performed according to IEEE754-1985 standard with */
/* rounding mode = nearest even. */
/* */
/*----------------------------------------------------------------------------- */
#include <ppc4xx.inc>
#include "fpeLib.inc"
function_prolog(__mulsf3)
mfcr r0 /* save cr */
/* isolate exponents */
rlwinm r8,r3,9,0xff /* isolate exponent of fpa into r8 */
rlwinm r11,r4,9,0xff /* isolate exponent of fpc into r11 */
/* test for normal: */
cmpwi cr0,r8,0xff /* if (fpa.exp == SEXPMAX) */
cmpwi cr1,r8,0 /* if (fpa.exp == 0) */
cmpwi cr2,r11,0xff /* if (fpb.exp == SEXPMAX) */
cmpwi cr3,r11,0 /* if (fpb.exp == 0) */
beq full /* fpa.exp is max */
beq cr2,full /* fpb.exp is max */
beq cr1,tst0a /* fpa.exp is 0 */
beq cr3,tst0b /* fpb.exp is 0 */
/* normal case only: add in implied one */
oris r9,r3,0x0080 /* fpa.hi |= 0x00800000; */
oris r12,r4,0x0080 /* fpb.hi |= 0x00800000; */
/* Calculate exponent */
add r8,r8,r11 /* result.expt = fpa.exp + fpb.exp; */
/* ------- note: r11 is now free */
addic. r8,r8,-126 /* result.exp -= SEXPBIAS */
/* result.exp += 1 (for multiplying left-just */
/* numbers if result is left-just) */
cmpwi cr2,r8,253 /* if (result.exp >= (SEXPMAX--2) : */
ble full
bge cr2,full
/* Calculate sign: */
xor r11,r3,r4 /* get product sign */
/* Multiply the numbers: */
/* Start by shifting everything left 8 bits, so that the MSW is full: */
rlwinm r5,r9,8,0,23 /* r5 = fpa */
rlwinm r6,r12,8,0,23 /* r6 = fpb */
/* Multiply a and b (upper half only): */
mulhwu. r5,r5,r6 /* r5 gets high word of result */
/* ------- note: r6 is now free */
/* There are two possibilities for the result: */
/* Either (bit 0 is 1) or (bit 0 is 0 and bit 1 is 1). */
/* If bit 0 is 0, then shift left 1 bit to normalize: */
blt msb_is_bit0
add r5,r5,r5 /* shift fraction (r5) left 1 to fill msb*/
addic. r8,r8,-1 /* shifted case: exponent is 1 less */
/* than non-shifted */
beq full /* if exponent out of range, needs full handling */
msb_is_bit0:
/* See if we need to round. (NOTE: the exact .5 case needs more precision */
/* than what we have calculated; in that case, go to the full handler.) */
/* 24 bits (bits 0-23 of the result) are significant. The sticky bit */
/* is bit 24. If it is 0, then no rounding. */
/* BUT we don't really know the sticky bit for sure yet -- the uncalculated */
/* low-order 16 bits of the 48-bit product could conceivably cause the low */
/* bits of our result to round up, changing the state of bit 24. */
/* The round-up is the result of a carry-out from the low 12 bits, */
/* so at most it adds 1. But we have already */
/* shifted left one bit, just above, so by now the carry-out could add */
/* as much as 2. The addition of a 2-bit number can */
/* propagate up to bit 24 ONLY if bits 25-30 are all ones: */
rlwinm. r10,r5,0,25,31
cmpwi cr2,r10,0x7e /* special case if bits 25-30 all 1's */
rlwinm r10,r5,0,23,24 /* test LSb and sticky bit */
bge cr2,full
cmpwi cr3,r10,0x80 /* special rounding if LSb=0 and sticky=1 */
addic r5,r5,0x80 /* standard rounding = add 1 to sticky */
bne+ cr3,std_done
beq full /* if LSb=0 and sticky=1 and 22-31 all */
/* zeros, then special case */
std_done:
mcrxr cr0 /* if carry out, increment exponent */
bf cr0_2,nocarry
addi r8,r8,1
nocarry:
/* We're done! Just assemble the result back into IEEE format: */
/* r3 = IEEE result */
/* Shift right 8 bits to put the MSb into bit 8, but */
/* mask it off since it's implied: */
rlwinm r3,r5,32-8,9,31
/* Put the exponent into bits 1-8 of the MSW: */
rlwimi r3,r8,23,1,8
/* Insert sign bit: */
rlwimi r3,r11,0,0,0
mtcr r0 /* restore cr */
blr /* return; */
tst0a:
rlwinm. r11,r3,0,1,31 /* FRA = +-0 ? */
beq return0
beq cr3,tst0b /* fpb.exp == 0 ? (held in cr3)*/
b full
/* a is 0: */
/* Calculate sign: */
return0:
xor r3,r3,r4 /* get product sign */
rlwinm r3,r3,0,0,0 /* *FRT = fpa.hi = 0+sign */
mtcr r0 /* restore cr */
blr /* return */
tst0b:
rlwinm. r11,r4,0,1,31 /* FRC = +-0 ? */
bne full
/* b is 0: */
b return0
/*---------------- end of normal-only case --------------- */
full:
.fmsful:
mtcr r0 /* restore CR */
mtctr r0 /* save cr in ctr */
/* load fpa into r8,r9 and cr6. load fpb into r11, r12 and cr7 */
mr r9,r3 /* load fpa.exp, fpa.S, fpa.hi */
mr r12,r4 /* load fpb.exp, fpb.S, fpb.hi */
rlwinm r8,r9,9,0xff /* isolate exponent of fpa */
rlwinm r11,r12,9,0xff /* isolate exponent of fpb */
cmpwi cr6,r9,0 /* set fpa.sign */
cmpwi cr7,r12,0 /* set fpb.sign */
rlwinm. r9,r9,0,0x007fffff /* isolate fpa.hi */
cror cr6_zero,cr0_2,cr0_2 /* fpa.zero = fpa.hi == 0 */
rlwinm. r12,r12,0,0x007fffff /* isolate fpb.hi */
cror cr7_zero,cr0_2,cr0_2 /* fpb.zero = fpa.hi == 0 */
cmpwi cr0,r8,0xff /* if (fpa.exp == SEXPMAX) */
crand cr6_inf,cr6_zero,cr0_2 /* fpa.inf=(fpa.exp==SEXPMAX && fpa==0) */
crandc cr6_NaN,cr0_2,cr6_zero /* fpa.NaN=(fpa.exp==SEXPMAX && fpa!=0) */
cmpwi cr0,r8,0 /* if (fpa.exp == 0) */
crand cr6_zero,cr6_zero,cr0_2 /* fpa.zero=(fpa.exp==0 && fpa==0) */
crandc cr0_2,cr0_2,cr6_zero /* if (fpa.exp==0 && fpa!=0) */
cmpwi cr1,r11,0xff /* if (fpb.exp == SEXPMAX) */
beq denormal_a /* { Add implied 1 to significand */
oris r9,r9,0x0080 /* fpa.hi |= 0x00800000; */
b adone /* } else { */
denormal_a:
addi r8,r8,1 /* fpa.exp++; */
adone: /* } */
crand cr7_inf,cr7_zero,cr1_2 /* fpb.inf=(fpb.exp==SEXPMAX && fpb==0) */
crandc cr7_NaN,cr1_2,cr7_zero /* fpb.NaN=(fpb.exp==SEXPMAX && fpb!=0) */
cmpwi cr0,r11,0 /* if (fpb.exp == 0) */
crand cr7_zero,cr7_zero,cr0_2 /* fpb.zero=(fpb.exp==0 && fpb==0) */
crandc cr0_2,cr0_2,cr7_zero /* if (fpb.exp==0 && fpb!=0) */
beq denormal_b /* { Add implied 1 to significand */
oris r12,r12,0x0080 /* fpb.hi |= 0x00800000; */
b bdone /* } else { */
denormal_b:
addi r11,r11,1 /* fpb.exp++; */
bdone: /* } */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -