📄 ppc_fadd.s

📁 powerpc 405 优化过的硬浮点库
💻 S
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* fpopt/ppc_fadd.S, pl_FPE_common, pl_linux 11/24/03 16:17:25                                                                  */
/*----------------------------------------------------------------------------- */
/*  Copyright (c) 2003, IBM Corporation                                         */
/*  All rights reserved.                                                        */
/*                                                                              */
/*  Redistribution and use in source and binary forms, with or                  */
/*  without modification, are permitted provided that the following             */
/*  conditions are met:                                                         */
/*                                                                              */
/*    * Redistributions of source code must retain the above                    */
/*      copyright notice, this list of conditions and the following             */
/*      disclaimer.                                                             */
/*    * Redistributions in binary form must reproduce the above                 */
/*      copyright notice, this list of conditions and the following             */
/*      disclaimer in the documentation and/or other materials                  */
/*      provided with the distribution.                                         */
/*    * Neither the name of IBM nor the names of its contributors               */
/*      may be used to endorse or promote products derived from this            */
/*      software without specific prior written permission.                     */
/*                                                                              */
/*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND                      */
/*  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,                 */
/*  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF                    */
/*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE                    */
/*  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS           */
/*  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,         */
/*  OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,                    */
/*  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR          */
/*  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY         */
/*  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT                */
/*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE           */
/*  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.    */
/*                                                                              */
/*----------------------------------------------------------------------------- */
/*                                                                              */
/* Function: add two double floating point numbers. frt = fpa + fpb             */
/* Input:    r3,r4(fpa)                                                         */
/*           r5,r6(fpb)                                                         */
/* Output:   r3,r4(frt)                                                         */
/* Notes:   1. No stack frame is created for this function, so the following    */
/*             registers must be preserved, as required by ABI specification:   */
/*               LR, CR0, R1, R2, R13-R31                                       */
/*          2. operation performed according to IEEE754-1985 standard with      */
/*             rounding mode = nearest even.                                    */
/*          3. This file contains code common to both addition and subtraction. */
/*          4. Eventually, the smaller magnitude argument (based on the high    */
/*             word only) will be in <r9,r10>, and the larger in <r8, r7>.      */
/*             GPRs <r9-r12> are used as a 128-bit register for developing the  */
/*             sum of the fractions. The signs of the arguments are dumped      */
/*             into the CR, and logical operations are used to decide if the    */
/*             smaller argument should be subtracted.                           */
/*             CR4 is used to remember the sign of the "big" argument, and      */
/*             whether the signs of the args were different.                    */
/*                                                                              */
/*----------------------------------------------------------------------------- */

#define cr4_lt 16 
#define cr4_eq 18 
#define cr6_lt 24 
#define cr7_lt 28 

#include <ppc4xx.inc>

function_prolog(__adddf3)

     mr        r7,r5                  /* load B(high) */
/*                                                                              */
/* from here on is common code                                                  */
/*                                                                              */
     .globl     _ppc_fadd_common
_ppc_fadd_common:
     mfcr      r0                      /* save cr */
     mtctr     r0
     mr        r8,r3                 /* load A(high) */
     rlwinm    r0,r8,12,0x7FF        /* isolate exponent of A */
     cmpwi     cr0,r0,0x7FF          /* check A for INF/NaN */
     rlwinm    r12,r7,12,0x7FF       /* isolate exponent of B */
     cmpwi     cr1,r12,0x7FF         /* check B for INF/NaN */
     cmpwi     cr6,r8,0              /* CR6_lt == fra sign bit */
     beq       cr0,A_is_spec         /* A is INF/NaN */
     cmpwi     cr7,r7,0              /* CR7_lt == frb sign bit */
     beq       cr1,B_is_spec         /* B is INF/NaN */
/*                                                                              */
/* Neither A nor B is INF or NaN.  Signs are captured in CR6 & CR7. */
/*                                                                              */
     rlwinm    r8,r8,0,0x7FFFFFFF    /* strip signs */
     rlwinm    r7,r7,0,0x7FFFFFFF    /* strip signs */
     cmpw      cr0,r8,r7             /* compare A(high) with B(high) */
     blt       cr0,A_small           /* A operand is smaller */
/*                                                                              */
     rlwinm    r9,r7,0,0x000FFFFF    /* high fraction into r9 */
     mr        r10,r6                /* low fraction to r10 */
     cror      cr4_lt,cr6_lt,cr6_lt  /* sign of result is sign of A */
     rlwinm    r6,r7,12,0x7FF        /* exp(B) to r6 */
     mr        r7,r4                 /* low fraction of A to r7 */
     crxor     cr4_eq,cr6_lt,cr7_lt  /* true iff signs differ */
     rlwinm    r5,r8,12,0x7FF        /* exp(A) to r5 */
     rlwinm    r8,r8,0,0x000FFFFF    /* high fraction (A) to r8 */
     b         ready                 /* jump to common code */
/*                                                                              */
A_small:
     rlwinm    r9,r8,0,0x000FFFFF    /* high fraction into r9 */
     mr        r10,r4                /* low fraction to r10 */
     cror      cr4_lt,cr7_lt,cr7_lt  /* sign of result is sign of B */
     rlwinm    r8,r7,0,0x000FFFFF    /* high fraction     (B) to r8 */
     rlwinm    r5,r7,12,0x7FF        /* exp(B) to r5 */
     crxor     cr4_eq,cr6_lt,cr7_lt  /* true iff signs differ */
     mr        r7,r6                 /* low fraction of B to r7 */
     ori       r6,r0,0               /* copy exp(A) to r6 */
/*                                                                              */
/* now things are set up                                                        */
/*  r5 = exp(big),  r8 = high frac(big),  r7  = low frac(big)             */
/*  r6 = exp(small), r9 = high frac(small), r10 = low frac(small)           */
/*                                                                              */
ready:
     cmpwi     cr0,r6,0              /* check for denorm/Zero */
     cmpwi     cr1,r5,0
     oris      r8,r8,0x0010          /* materialize "hidden" bit */
     oris      r9,r9,0x0010          /* materialize "hidden" bit */
     bne       cr0,adone             /* is small special? */
     xoris     r9,r9,0x0010          /* yes. clear the leading bit */
     addi      r6,r6,1               /* and correct exponent */
adone:     
     xor       r12,r12,r12           /* clear register */
     bne       cr1,bdone             /* is big special? */
     xoris     r8,r8,0x0010          /* yes. clear the leading bit */
     addi      r5,r5,1               /* and correct exponent */
bdone:
/*                                                                              */
/* now pre-shift "small" so exponents match                             */
/*                                                                              */
     subfc.    r0,r6,r5              /* get difference in exponents */
     cmpwi     cr1,r0,54             /* check max pre-shift amount */
     cmpwi     cr2,r0,32             /* */
     xor       r11,r11,r11           /* clear register (wasted if pre-alignment needed) */
     beq       cr0,aligned           /* equal */
     rlwinm.   r6,r0,0,0x1F          /* get part-word shift amount */
     bgt       cr1,ret_big           /* difference > 53 - "big" is result */
     beq       cr0,check_32          /* make sure some shift amount mod 32 */

     subfic    r12,r6,32             /* get complementary shift amount */
     slw       r11,r10,r12           /* */
     srw       r10,r10,r6            /* shift */
     slw       r12,r9,r12            /* */
     or        r10,r10,r12
     srw       r9,r9,r6              /* */

check_32:
     xor       r12,r12,r12           /* clear register (again) */
     blt       cr2,aligned           /* shift at least 32 bits? */

     ori       r12,r11,0             /* move r11 to r12 */
     ori       r11,r10,0             /* move r10 to r11 */
     ori       r10,r9,0              /* move r9 to r10 */
     xor       r9,r9,r9              /* zero r9 */
/*                                                                              */
aligned:
     beq       cr4,do_sub            /* do subtract if signs different */
     addc      r10,r10,r7            /* add the fractions */
     adde      r9,r9,r8              /* */
/*                                                                              */
     cntlzw    r0,r9                 /* check     leading     zero bits */
     cmpwi     cr0,r0,11             /* */
     beq       cr0,do_round          /* just right.  round to     double */
     bgt       cr0,do_sign           /* unnormalized - must be zero or denorm */
/*                                                                              */
right_1:
     rlwinm    r0,r11,0,0x1          /* get last bit */
     rlwinm    r11,r11,31,0x7FFFFFFF /* right     shift 1 */
     rlwimi    r11,r10,31,0x80000000 /* */
     rlwinm    r10,r10,31,0x7FFFFFFF /* right     shift 1 */
     rlwimi    r10,r9,31,0x80000000  /* */
     addi      r5,r5,1               /* bump exponent */
     cmpli     cr2,r5,0x7fe          /* is exponent OK? */
     srwi      r9,r9,1               /* */
     or        r12,r12,r0            /* bag up last bit from r11 */
     bgt       cr2,out_of_range      /* final     exp is > max */
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -