📄 ppc_fadds.s
字号:
/* fpopt/ppc_fadds.S, pl_FPE_common, pl_linux 11/24/03 16:17:26 */
/*----------------------------------------------------------------------------- */
/* Copyright (c) 2003, IBM Corporation */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* * Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* * Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* * Neither the name of IBM nor the names of its contributors */
/* may be used to endorse or promote products derived from this */
/* software without specific prior written permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */
/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */
/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */
/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */
/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */
/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* */
/*----------------------------------------------------------------------------- */
/* */
/* Function: add two single floating point numbers. frt = fpa + fpb */
/* Input: r3(fpa), r4(fpb) */
/* Output: r3(frt) */
/* Notes: 1. No stack frame is created for this function, so the following */
/* registers must be preserved, as required by ABI specification: */
/* LR, CR0, R1, R2, R13-R31 */
/* 2. operation performed according to IEEE754-1985 standard with */
/* rounding mode = nearest even. */
/* 3. This file contains code common to both addition and subtraction. */
/* */
/*----------------------------------------------------------------------------- */
#include <ppc4xx.inc>
#include "fpeLib.inc"
/* */
#define SEXPMAX 255 /* Max value for single precision exponent */
#define SEXPBIAS 127 /* Bias value for single precision exponent */
function_prolog(__addsf3)
.globl _ppc_fadds_common
/* save cr in r0 */
mfcr r0
/* load fpa into r8,r9,r10 and cr6. load fpb into r11, r12, r4 and cr7 */
mr r9,r3 /* load fpa.S, fpa.exp, fpa.hi */
mr r12,r4 /* load fpb.S, fpb.exp, fpb.hi */
rlwinm r8,r9,9,0xff /* isolate exponent of fpa */
_ppc_fadds_common: /* common routine for fadds and fsubs */
rlwinm r11,r12,9,0xff /* isolate exponent of fpb */
cmpwi cr6,r9,0 /* set fpa.sign */
cmpwi cr7,r12,0 /* set fpb.sign */
rlwinm. r9,r9,0,0x007fffff /* isolate fpa.hi */
cror cr6_zero,cr0_2,cr0_2 /* fpa.zero = (fpa.hi == 0) */
rlwinm. r12,r12,0,0x007fffff /* isolate fpb.hi */
cror cr7_zero,cr0_2,cr0_2 /* fpb.zero = (fpb.hi == 0) */
cmpwi cr2,r8,SEXPMAX /* if (fpa.exp == SEXPMAX) */
cmpwi cr0,r8,0 /* if (fpa.exp == 0) */
crand cr6_zero,cr6_zero,cr0_2 /* fpa.zero=(fpa.exp==0 && fpa==0) */
crandc cr0_2,cr0_2,cr6_zero /* if (fpa.exp==0 && fpa!=0) */
cmpwi cr3,r11,SEXPMAX /* if (fpb.exp == SEXPMAX) */
beq denormal_a /* if fpa is not denormalized */
oris r9,r9,0x0080 /* add in implicit 1. */
b adone /* } else { */
denormal_a:
addi r8,r8,1 /* fpa.exp++; */
adone: /* } */
crand cr7_inf,cr7_zero,cr1_2 /* fpb.inf=(fpb.exp==SEXPMAX && fpb==0) */
crandc cr7_NaN,cr1_2,cr7_zero /* fpb.NaN=(fpb.exp==SEXPMAX && fpb!=0) */
cmpwi cr0,r11,0 /* if (fpb.exp == 0) */
crand cr7_zero,cr7_zero,cr0_2 /* fpb.zero=(fpb.exp==0 && fpb==0) */
crandc cr0_2,cr0_2,cr7_zero /* if (fpb.exp==0 && fpb!=0) */
beq denormal_b /* { Add implied 1 to significand */
oris r12,r12,0x0080 /* fpb.hi |= 0x00800000; */
b bdone /* } else { */
denormal_b:
addi r11,r11,1 /* fpb.exp++; */
bdone: /* } */
/* check for Not-A-Number or INFinity */
bt cr2_2,a_NaNorINF /* if (fpa.NaN||fpa.INF) goto a_NaNorINF; */
bt cr3_2,b_NaNorINF /* if (fpb.NaN||fpb.INF) goto b_NaNorINF; */
/* check for a or b zero - if so, done */
bt cr6_zero,a_zero /* if (fpa.zero) return fpb */
bt cr7_zero,b_zero /* if (fpb.zero) return fpa */
/* if (fpa < 0) fpa = -fpa; */
bf+ cr6_sign,a_positive /* if (fpa < 0) { */
subfic r9,r9,0 /* fpa = -fpa; */
a_positive: /* } */
/* if (fpb < 0) fpb = -fpb; */
bf+ cr7_sign,b_positive /* if (fpb < 0) { */
subfic r12,12,0 /* fpb = -fpb; */
b_positive: /* } */
cmpw cr0,r11,r8 /* if (fpa.exp < fpb.exp ) */
li r10,0 /* fpa.lo = 0 */
li r4,0 /* fpb.lo = 0 */
bgt shifta /* { */
/* fpb.significand >>= (fpa.exp - fpb.exp); */
subf. r11,r11,r8 /* shift = fpa.exp - fpb.exp; */
cmpwi cr1,r11,27 /* if beyond precision */
beq do_add /* if (shift == 0) goto do_add; */
ble+ cr1,shiftless27 /* if (shift > 27) { */
/* add in a (signed) rounding bit */
bt cr7_sign,add_minus1 /* if (b > 0) { add in 1 */
li r12,0 /* fpb.hi = 0; */
li r4,1 /* fpb.lo = 1; */
b do_add /* } */
add_minus1: /* else { b < 0 : add in -1 */
li r12,-1 /* fpb.hi = -1; */
li r4,-1 /* fpb.lo = -1; */
b do_add /* } */
/* } */
shiftless27: /* else */
/* { */
subfic r6,r11,32 /* r6 = 32-shift; */
slw r4,r12,r6 /* fpb.lo = shift out from fpb.hi */
sraw r12,r12,r11 /* (signed)fpb.hi >>= shift; */
b do_add /* } */
shifta: /* else { */
/* fpa.significand >>= (fpb.exp - fpa.exp); */
mr r6,r11 /* save exponent */
subf. r11,r8,r11 /* shift = fpb.exp - fpa.exp; */
mr r8,r6 /* exponent */
cmpwi cr1,r11,27 /* if beyond precision */
ble+ cr1,Ashiftless27 /* if (shift > 27) { */
/* add in a (signed) rounding bit */
bt cr6_sign,Aadd_minus1 /* if (a > 0) { add in 1 */
li r9,0 /* fpa.hi = 0; */
li r10,1 /* fpa.lo = 1; */
b do_add /* } */
Aadd_minus1: /* else { a < 0 : add in -1 */
li r9,-1 /* fpa.hi = -1; */
li r10,-1 /* fpa.lo = -1; */
b do_add /* } */
/* } */
Ashiftless27: /* else */
/* { */
subfic r6,r11,32 /* r6 = 32-shift; */
slw r10,r9,r6 /* fpb.lo = shift out from fpb.hi */
sraw r9,r9,r11 /* (signed)fpa.hi >>= shift; */
do_add: /* } */
/* fpa.significand += fpb.significand; */
addc r10,r10,r4 /* fpa.lo += fpb.lo; */
adde. r9,r9,r12 /* fpa.hi += fpb.hi + CA; */
cror cr6_sign,cr0_0,cr0_0 /* fpa.sign = (fpa < 0); */
/* if (fpa < 0) fpa = -fpa; */
bge pos_result /* if (fpa < 0) { */
subfic r10,r10,0 /* fpa = -fpa; */
subfze r9,r9
pos_result: /* } */
/* normalize fpa */
cntlzw r5,r9 /* s = cntlz(fpa.hi); */
cmpwi cr0,r5,8 /* if (s < 8 ) */
/* Note that first 8 bits of word */
/* were 0 (where exponent and sign */
/* were), so s can really only have */
/* a min value of 7 . */
bge noshrght /* { */
subfic r5,r5,8 /* r5 = 8-s; */
subfic r11,r5,32 /* r11 = 24+s; */
srw r10,r10,r5 /* fpa.lo >>= (11-s); */
slw r6,r9,r11 /* temp = fpa.hi << (21+s); */
srw r9,r9,r5 /* fpa.hi >>= (11-s); */
add r8,r5,r8 /* fpa.exp += (11-s); */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -