📄 ppc_fadd.s
字号:
/* fpopt/ppc_fadd.S, pl_FPE_common, pl_linux 11/24/03 16:17:25 */
/*----------------------------------------------------------------------------- */
/* Copyright (c) 2003, IBM Corporation */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* * Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* * Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* * Neither the name of IBM nor the names of its contributors */
/* may be used to endorse or promote products derived from this */
/* software without specific prior written permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */
/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */
/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */
/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */
/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */
/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* */
/*----------------------------------------------------------------------------- */
/* */
/* Function: add two double floating point numbers. frt = fpa + fpb */
/* Input: r3,r4(fpa) */
/* r5,r6(fpb) */
/* Output: r3,r4(frt) */
/* Notes: 1. No stack frame is created for this function, so the following */
/* registers must be preserved, as required by ABI specification: */
/* LR, CR0, R1, R2, R13-R31 */
/* 2. operation performed according to IEEE754-1985 standard with */
/* rounding mode = nearest even. */
/* 3. This file contains code common to both addition and subtraction. */
/* 4. Eventually, the smaller magnitude argument (based on the high */
/* word only) will be in <r9,r10>, and the larger in <r8, r7>. */
/* GPRs <r9-r12> are used as a 128-bit register for developing the */
/* sum of the fractions. The signs of the arguments are dumped */
/* into the CR, and logical operations are used to decide if the */
/* smaller argument should be subtracted. */
/* CR4 is used to remember the sign of the "big" argument, and */
/* whether the signs of the args were different. */
/* */
/*----------------------------------------------------------------------------- */
#define cr4_lt 16
#define cr4_eq 18
#define cr6_lt 24
#define cr7_lt 28
#include <ppc4xx.inc>
function_prolog(__adddf3)
mr r7,r5 /* load B(high) */
/* */
/* from here on is common code */
/* */
.globl _ppc_fadd_common
_ppc_fadd_common:
mfcr r0 /* save cr */
mtctr r0
mr r8,r3 /* load A(high) */
rlwinm r0,r8,12,0x7FF /* isolate exponent of A */
cmpwi cr0,r0,0x7FF /* check A for INF/NaN */
rlwinm r12,r7,12,0x7FF /* isolate exponent of B */
cmpwi cr1,r12,0x7FF /* check B for INF/NaN */
cmpwi cr6,r8,0 /* CR6_lt == fra sign bit */
beq cr0,A_is_spec /* A is INF/NaN */
cmpwi cr7,r7,0 /* CR7_lt == frb sign bit */
beq cr1,B_is_spec /* B is INF/NaN */
/* */
/* Neither A nor B is INF or NaN. Signs are captured in CR6 & CR7. */
/* */
rlwinm r8,r8,0,0x7FFFFFFF /* strip signs */
rlwinm r7,r7,0,0x7FFFFFFF /* strip signs */
cmpw cr0,r8,r7 /* compare A(high) with B(high) */
blt cr0,A_small /* A operand is smaller */
/* */
rlwinm r9,r7,0,0x000FFFFF /* high fraction into r9 */
mr r10,r6 /* low fraction to r10 */
cror cr4_lt,cr6_lt,cr6_lt /* sign of result is sign of A */
rlwinm r6,r7,12,0x7FF /* exp(B) to r6 */
mr r7,r4 /* low fraction of A to r7 */
crxor cr4_eq,cr6_lt,cr7_lt /* true iff signs differ */
rlwinm r5,r8,12,0x7FF /* exp(A) to r5 */
rlwinm r8,r8,0,0x000FFFFF /* high fraction (A) to r8 */
b ready /* jump to common code */
/* */
A_small:
rlwinm r9,r8,0,0x000FFFFF /* high fraction into r9 */
mr r10,r4 /* low fraction to r10 */
cror cr4_lt,cr7_lt,cr7_lt /* sign of result is sign of B */
rlwinm r8,r7,0,0x000FFFFF /* high fraction (B) to r8 */
rlwinm r5,r7,12,0x7FF /* exp(B) to r5 */
crxor cr4_eq,cr6_lt,cr7_lt /* true iff signs differ */
mr r7,r6 /* low fraction of B to r7 */
ori r6,r0,0 /* copy exp(A) to r6 */
/* */
/* now things are set up */
/* r5 = exp(big), r8 = high frac(big), r7 = low frac(big) */
/* r6 = exp(small), r9 = high frac(small), r10 = low frac(small) */
/* */
ready:
cmpwi cr0,r6,0 /* check for denorm/Zero */
cmpwi cr1,r5,0
oris r8,r8,0x0010 /* materialize "hidden" bit */
oris r9,r9,0x0010 /* materialize "hidden" bit */
bne cr0,adone /* is small special? */
xoris r9,r9,0x0010 /* yes. clear the leading bit */
addi r6,r6,1 /* and correct exponent */
adone:
xor r12,r12,r12 /* clear register */
bne cr1,bdone /* is big special? */
xoris r8,r8,0x0010 /* yes. clear the leading bit */
addi r5,r5,1 /* and correct exponent */
bdone:
/* */
/* now pre-shift "small" so exponents match */
/* */
subfc. r0,r6,r5 /* get difference in exponents */
cmpwi cr1,r0,54 /* check max pre-shift amount */
cmpwi cr2,r0,32 /* */
xor r11,r11,r11 /* clear register (wasted if pre-alignment needed) */
beq cr0,aligned /* equal */
rlwinm. r6,r0,0,0x1F /* get part-word shift amount */
bgt cr1,ret_big /* difference > 53 - "big" is result */
beq cr0,check_32 /* make sure some shift amount mod 32 */
subfic r12,r6,32 /* get complementary shift amount */
slw r11,r10,r12 /* */
srw r10,r10,r6 /* shift */
slw r12,r9,r12 /* */
or r10,r10,r12
srw r9,r9,r6 /* */
check_32:
xor r12,r12,r12 /* clear register (again) */
blt cr2,aligned /* shift at least 32 bits? */
ori r12,r11,0 /* move r11 to r12 */
ori r11,r10,0 /* move r10 to r11 */
ori r10,r9,0 /* move r9 to r10 */
xor r9,r9,r9 /* zero r9 */
/* */
aligned:
beq cr4,do_sub /* do subtract if signs different */
addc r10,r10,r7 /* add the fractions */
adde r9,r9,r8 /* */
/* */
cntlzw r0,r9 /* check leading zero bits */
cmpwi cr0,r0,11 /* */
beq cr0,do_round /* just right. round to double */
bgt cr0,do_sign /* unnormalized - must be zero or denorm */
/* */
right_1:
rlwinm r0,r11,0,0x1 /* get last bit */
rlwinm r11,r11,31,0x7FFFFFFF /* right shift 1 */
rlwimi r11,r10,31,0x80000000 /* */
rlwinm r10,r10,31,0x7FFFFFFF /* right shift 1 */
rlwimi r10,r9,31,0x80000000 /* */
addi r5,r5,1 /* bump exponent */
cmpli cr2,r5,0x7fe /* is exponent OK? */
srwi r9,r9,1 /* */
or r12,r12,r0 /* bag up last bit from r11 */
bgt cr2,out_of_range /* final exp is > max */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -