📄 ppc_fadds.s

📁 powerpc 405 优化过的硬浮点库
💻 S
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* fpopt/ppc_fadds.S, pl_FPE_common, pl_linux 11/24/03 16:17:26                                                                  */
/*----------------------------------------------------------------------------- */
/*  Copyright (c) 2003, IBM Corporation                                         */
/*  All rights reserved.                                                        */
/*                                                                              */
/*  Redistribution and use in source and binary forms, with or                  */
/*  without modification, are permitted provided that the following             */
/*  conditions are met:                                                         */
/*                                                                              */
/*    * Redistributions of source code must retain the above                    */
/*      copyright notice, this list of conditions and the following             */
/*      disclaimer.                                                             */
/*    * Redistributions in binary form must reproduce the above                 */
/*      copyright notice, this list of conditions and the following             */
/*      disclaimer in the documentation and/or other materials                  */
/*      provided with the distribution.                                         */
/*    * Neither the name of IBM nor the names of its contributors               */
/*      may be used to endorse or promote products derived from this            */
/*      software without specific prior written permission.                     */
/*                                                                              */
/*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND                      */
/*  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,                 */
/*  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF                    */
/*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE                    */
/*  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS           */
/*  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,         */
/*  OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,                    */
/*  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR          */
/*  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY         */
/*  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT                */
/*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE           */
/*  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.    */
/*                                                                              */
/*----------------------------------------------------------------------------- */
/*                                                                              */
/* Function: add two single floating point numbers. frt = fpa + fpb             */
/* Input:    r3(fpa), r4(fpb)                                                   */
/* Output:   r3(frt)                                                            */
/* Notes:   1. No stack frame is created for this function, so the following    */
/*             registers must be preserved, as required by ABI specification:   */
/*               LR, CR0, R1, R2, R13-R31                                       */
/*          2. operation performed according to IEEE754-1985 standard with      */
/*             rounding mode = nearest even.                                    */
/*          3. This file contains code common to both addition and subtraction. */
/*                                                                              */
/*----------------------------------------------------------------------------- */

#include <ppc4xx.inc>
#include "fpeLib.inc"

/*                                                                              */
#define SEXPMAX  255  /* Max value for single precision exponent */
#define SEXPBIAS 127  /* Bias value for single precision exponent */

function_prolog(__addsf3)

.globl  _ppc_fadds_common

/* save cr in r0                                                                */
        mfcr    r0
/* load fpa into r8,r9,r10 and cr6.  load fpb into r11, r12, r4 and cr7         */
        mr      r9,r3                   /* load fpa.S, fpa.exp, fpa.hi */
        mr      r12,r4                  /* load fpb.S, fpb.exp, fpb.hi */
        rlwinm  r8,r9,9,0xff            /* isolate exponent of fpa */
        
_ppc_fadds_common:                      /* common routine for fadds and fsubs */
        rlwinm  r11,r12,9,0xff          /* isolate exponent of fpb */
        cmpwi   cr6,r9,0                /* set fpa.sign */
        cmpwi   cr7,r12,0               /* set fpb.sign */
        rlwinm. r9,r9,0,0x007fffff      /* isolate fpa.hi */
        cror    cr6_zero,cr0_2,cr0_2    /* fpa.zero = (fpa.hi == 0) */
        rlwinm. r12,r12,0,0x007fffff    /* isolate fpb.hi */
        cror    cr7_zero,cr0_2,cr0_2    /* fpb.zero = (fpb.hi == 0) */
        cmpwi   cr2,r8,SEXPMAX          /* if (fpa.exp == SEXPMAX) */
        cmpwi   cr0,r8,0                /* if (fpa.exp == 0) */
        crand   cr6_zero,cr6_zero,cr0_2 /* fpa.zero=(fpa.exp==0 && fpa==0) */
        crandc  cr0_2,cr0_2,cr6_zero    /* if (fpa.exp==0 && fpa!=0) */
        cmpwi   cr3,r11,SEXPMAX         /* if (fpb.exp == SEXPMAX) */
        beq     denormal_a              /* if fpa is not denormalized       */
        oris    r9,r9,0x0080            /*    add in implicit 1.    */
        b       adone                   /* } else { */
denormal_a:
        addi    r8,r8,1                 /*    fpa.exp++; */
adone:                                  /* } */
        crand   cr7_inf,cr7_zero,cr1_2  /* fpb.inf=(fpb.exp==SEXPMAX && fpb==0) */
        crandc  cr7_NaN,cr1_2,cr7_zero  /* fpb.NaN=(fpb.exp==SEXPMAX && fpb!=0) */
        cmpwi   cr0,r11,0               /* if (fpb.exp == 0) */
        crand   cr7_zero,cr7_zero,cr0_2 /* fpb.zero=(fpb.exp==0 && fpb==0) */
        crandc  cr0_2,cr0_2,cr7_zero    /* if (fpb.exp==0 && fpb!=0) */
        beq     denormal_b              /* {  Add implied 1 to significand */
        oris    r12,r12,0x0080          /*    fpb.hi |= 0x00800000; */
        b       bdone                   /* } else { */
denormal_b:
        addi    r11,r11,1               /*   fpb.exp++; */
bdone:                                  /* } */

/* check for Not-A-Number or INFinity                                           */
        bt      cr2_2,a_NaNorINF        /* if (fpa.NaN||fpa.INF) goto a_NaNorINF; */
        bt      cr3_2,b_NaNorINF        /* if (fpb.NaN||fpb.INF) goto b_NaNorINF; */
        
/* check for a or b zero - if so, done                                      */
        bt      cr6_zero,a_zero         /* if (fpa.zero) return fpb         */
        bt      cr7_zero,b_zero         /* if (fpb.zero) return fpa         */
        
/* if (fpa < 0) fpa = -fpa;                                                     */
        bf+     cr6_sign,a_positive     /* if (fpa < 0) { */
        subfic  r9,r9,0                 /*   fpa = -fpa; */
a_positive:                             /* } */
/* if (fpb < 0) fpb = -fpb;                                                     */
        bf+     cr7_sign,b_positive     /* if (fpb < 0) { */
        subfic  r12,12,0                /*   fpb = -fpb; */   
b_positive:                             /* } */
        cmpw    cr0,r11,r8              /* if (fpa.exp < fpb.exp ) */
        li      r10,0                   /* fpa.lo = 0 */
        li      r4,0                    /* fpb.lo = 0 */
        bgt     shifta                  /* { */
/* fpb.significand >>= (fpa.exp - fpb.exp);                                     */
        subf.   r11,r11,r8              /* shift = fpa.exp - fpb.exp; */
        cmpwi   cr1,r11,27              /* if beyond precision */
        beq     do_add                  /* if (shift == 0) goto do_add; */
        ble+    cr1,shiftless27         /* if (shift > 27) { */
                                        /*   add in a (signed) rounding bit */
        bt      cr7_sign,add_minus1     /*   if (b > 0) { add in 1 */
        li      r12,0                   /*     fpb.hi = 0; */
        li      r4,1                    /*     fpb.lo = 1; */
        b       do_add                  /*   } */
add_minus1:                             /*   else { b < 0 : add in -1 */
        li      r12,-1                  /*     fpb.hi = -1; */
        li      r4,-1                   /*     fpb.lo = -1; */
        b       do_add                  /*   } */
                                        /* } */
shiftless27:                            /* else */
                                        /* { */
        subfic  r6,r11,32               /*   r6 = 32-shift; */
        slw     r4,r12,r6               /*   fpb.lo = shift out from fpb.hi */
        sraw    r12,r12,r11             /*   (signed)fpb.hi >>= shift; */
        b       do_add                  /* } */

shifta:                                 /* else { */
/* fpa.significand >>= (fpb.exp - fpa.exp);                                     */
        mr      r6,r11                  /* save exponent */
        subf.   r11,r8,r11              /* shift = fpb.exp - fpa.exp; */
        mr      r8,r6                   /* exponent */
        cmpwi   cr1,r11,27              /* if beyond precision */
        ble+    cr1,Ashiftless27        /* if (shift > 27) { */
                                        /*   add in a (signed) rounding bit */
        bt      cr6_sign,Aadd_minus1    /*   if (a > 0) { add in 1 */
        li      r9,0                    /*     fpa.hi = 0; */
        li      r10,1                   /*     fpa.lo = 1; */
        b       do_add                  /*   } */
Aadd_minus1:                            /*   else { a < 0 : add in -1 */
        li      r9,-1                   /*     fpa.hi = -1; */
        li      r10,-1                  /*     fpa.lo = -1; */
        b       do_add                  /*   } */
                                        /* } */
Ashiftless27:                           /* else */
                                        /* { */
        subfic  r6,r11,32               /*   r6 = 32-shift; */
        slw     r10,r9,r6               /*   fpb.lo = shift out from fpb.hi */
        sraw    r9,r9,r11               /*   (signed)fpa.hi >>= shift; */
do_add:                                 /* } */
/* fpa.significand += fpb.significand;                                          */
        addc    r10,r10,r4              /* fpa.lo += fpb.lo; */
        adde.   r9,r9,r12               /* fpa.hi += fpb.hi + CA; */
        cror    cr6_sign,cr0_0,cr0_0    /* fpa.sign = (fpa < 0); */
/* if (fpa < 0) fpa = -fpa;                                                     */
        bge     pos_result              /* if (fpa < 0) { */
        subfic  r10,r10,0               /*     fpa = -fpa; */
        subfze  r9,r9
pos_result:                             /* } */
/* normalize fpa                                                                */
        cntlzw  r5,r9                   /* s = cntlz(fpa.hi); */
        cmpwi   cr0,r5,8                /* if (s < 8 ) */
                                        /*   Note that first 8  bits of word */
                                        /*   were 0 (where exponent and sign */
                                        /*   were), so s can really only have */
                                        /*   a min value of 7 . */
        bge     noshrght                /* { */
        subfic  r5,r5,8                 /*   r5 = 8-s; */
        subfic  r11,r5,32               /*   r11 = 24+s; */
        srw     r10,r10,r5              /*   fpa.lo >>= (11-s); */
        slw     r6,r9,r11               /*   temp = fpa.hi << (21+s); */
        srw     r9,r9,r5                /*   fpa.hi >>= (11-s); */
        add     r8,r5,r8                /*   fpa.exp += (11-s); */
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -