📄 ppc_fdivs.s

📁 powerpc 405 优化过的硬浮点库
💻 S
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* fpopt/ppc_fdivs.S, pl_FPE_common, pl_linux 11/24/03 16:17:33                                                                  */
/*----------------------------------------------------------------------------- */
/*  Copyright (c) 2003, IBM Corporation                                         */
/*  All rights reserved.                                                        */
/*                                                                              */
/*  Redistribution and use in source and binary forms, with or                  */
/*  without modification, are permitted provided that the following             */
/*  conditions are met:                                                         */
/*                                                                              */
/*    * Redistributions of source code must retain the above                    */
/*      copyright notice, this list of conditions and the following             */
/*      disclaimer.                                                             */
/*    * Redistributions in binary form must reproduce the above                 */
/*      copyright notice, this list of conditions and the following             */
/*      disclaimer in the documentation and/or other materials                  */
/*      provided with the distribution.                                         */
/*    * Neither the name of IBM nor the names of its contributors               */
/*      may be used to endorse or promote products derived from this            */
/*      software without specific prior written permission.                     */
/*                                                                              */
/*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND                      */
/*  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,                 */
/*  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF                    */
/*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE                    */
/*  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS           */
/*  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,         */
/*  OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,                    */
/*  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR          */
/*  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY         */
/*  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT                */
/*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE           */
/*  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.    */
/*                                                                              */
/*----------------------------------------------------------------------------- */
/*                                                                              */
/* Function: divide two single floating point values.  frt = fpa / fpb          */
/* Input:    r3(fpa), r4(fpb)                                                   */
/* Output:   r3(frt)                                                            */
/* Notes:   1. No stack frame is created for the normal case of this function,  */
/*             so the following registers must be preserved, as required by     */
/*             ABI specification:                                               */
/*               LR, CR0, R1, R2, R13-R31                                       */
/*          2. For the full function case, a stack frame is created, following  */
/*             ABI specification, and non-volatile registers are saved and      */
/*             restored on the stack.                                           */
/*          3. operation performed according to IEEE754-1985 standard with      */
/*             rounding mode = nearest even.                                    */
/*                                                                              */
/*----------------------------------------------------------------------------- */
                                               
#include <ppc4xx.inc>
#include "fpeLib.inc"

#define first_loop 4 

function_prolog(__divsf3)     

     mfcr       r0                        /* save cr */
     
/*    load fpa into r8,r9,r10 and cr6.  load fpb into r11, r12, r13 and cr7 */
     mr         r9,r3                     /* load fpa.exp,     fpa.S, fpa.hi */
     mr         r12,r4                    /* load fpb.exp,     fpb.S, fpb.hi */
     rlwinm     r8,r9,9,0xff              /* isolate exponent of fpa into r8 */
     rlwinm     r11,r12,9,0xff            /* isolate exponent of fpb into r11 */
     cmpwi      cr6,r9,0                  /* set fpa.sign */
     cmpwi      cr7,r12,0                 /* set fpb.sign */
/* test for normal:                                                              */
     cmpwi      cr0,r8,0xff              /* if (fpa.exp == SEXPMAX) */
     cmpwi      cr1,r8,0                  /* if (fpa.exp == 0) */
     cmpwi      cr2,r11,0xff             /* if (fpb.exp == SEXPMAX) */
     cmpwi      cr3,r11,0                 /* if (fpb.exp == 0) */
     beq        full
     beq        cr1,full 
     beq        cr2,full
     beq        cr3,full 

/* normal case only:                                                            */
     oris       r9,r9,0x0080              /*    fpa.hi |= 0x00800000; */
     oris       r12,r12,0x0080            /*    fpb.hi |= 0x00800000; */
     
/*   Calculate exponent                                                         */
     subf       r8,r11,r8                 /* result.exp = fpa.exp - fpb.exp; */
                                          /* ------- note:     r11 is now free */
     addic.     r8,r8,+127                /* result.exp += SEXPBIAS; check for negative */
                                          /* result.exp += 1 (for dividing left-just */
                                          /* numbers if result is left-just) */        
     cmpwi      cr2,r8,0xff               /* if (result.exp) >=  SEXPMAX */
     ble        full                      /*    or less than minimum */
     bge        cr2,full 
     
/*   Zero out the non-number part of the     operands:                          */
/*   for the sign bit plus one working bit:                                 */
     rlwinm     r9,r9,0,8,31              /* r9 = fpa */
     rlwinm     r12,r12,0,8,31            /* r12 = fpb */

/*   Divide the numbers (25 bits of long division);                     */
/*   r5 holds the intermediate differences.                                     */
     li         r5,25                     /* calculate 24 signif + 1 sticky */
     mtctr      r5
     
longdivloop:
/*   r5 = r9 - r12:                                                             */
     subfc.     r5,r12,r9
     adde       r11,r11,r11               /* rot result left and put CA into LSb of result */
     blt+       zerobit
                                          /* if intermed diff positive then: */
     adde       r9,r5,r5                  /* r9 gets r5 << 1 */
     bdnz       longdivloop
     b          done25

zerobit:                                  /* else: */
     adde       r9,r9,r9                  /* r9 gets r9 << 1 */
     bdnz       longdivloop
done25:

/*   Since we divided one left-justified number by another left-justified   */
/*   number, there are two possibilities for the result:                    */
/*   either (the msb is 1) or                                               */
/*   (the msb is 0 and the next bit is 1). If the msb is 0, then we need to */
/*   calculate one more bit (to make 25 significant bits).                  */
/*   NOTE: the msb is now in bit 7 (=32-25) of result=r11, because we have  */
/*   calculated 25 bits.                                                    */
     rlwinm.    r5,r11,0,7,7              /* test hi-bit = bit 7 of result */
     bne+       highbit_is_1
     addic.     r8,r8,-1                  /* shifted case: exponent is 1 less than */
                                          /* non-shifted. */
     beq        full                      /* exponent=0 is special case */
     addi       r5,0,1
     mtctr      r5
     b          longdivloop
highbit_is_1:

/*   We now have the first 25 bits of the result. Bits beyond 24 are only */
/*   needed to determine rounding. If bit 25 (the last one just calculated) */
/*   is 0, then we don't round. If it is 1, then we'll calculate up to 12 more */
/*   bits, looking for a 1. The first time we see a 1, we know to round up. */
/*   But even if all 12 are 0, we still don't know for sure to not round, */
/*   so we punt to the full routine.                                            */
     rlwinm     r5,r11,0,30,31
     cmpwi      r5,1
     bne        roundup
     
/*   At this point the LSb is 0 and the sticky bit is 1; we need more bits */
/*   to determine rounding -- if any subsequent bit is 1, then we round up. */
/*   We'll try 12 more bits before giving up:                               */
     addi       r5,0,12
     mtctr      r5
seek1loop:
/*   r5 = r9 - r12:                                                             */
     subfc.     r5,r12,r9
     bge        roundup
                                          /* (result(this bit) = 0) */
     adde       r9,r9,r9                  /* r9 gets r9 << 1 */
     bdnz       seek1loop
     b          full

roundup:                                  /* adding 1 to the sticky bit performs standard */
     addi       r11,r11,1                 /* rounding; i.e. if sticky=0 then other bits */

/*   We're done! Just assemble the result back into IEEE format:        */
/*   We need to get the msb from bit 7 to bit 8 of the result, so shift 1 */
     srwi       r3,r11,1
     
/*   Put the exponent into bits 1-8 of the MSW:                            */
     rlwimi     r3,r8,23,1,8  
     
/*   Calculate sign bit:                                                        */
     crxor      cr6_sign,cr6_sign,cr7_sign
     bf         cr6_sign,signbit0
     oris       r3,r3,0x8000              /* set sign bit */
     
signbit0:
     mtcr       r0                        /* restore cr */   
     blr                                  /* return */

/*---------------- end of normal-only case ---------------                      */
full:
     mtcr       r0                        /* restore CR */   
     
     mflr       r0                        /* save link register in caller's stack */
     stw        r0,4(r1)               
     stwu       r1,-STACKSIZE(r1)         /* set up stack frame to save regs */
     
     SAVEREG(19)                          /* save r19 */
     mfcr       r19                       /* save cr */
     
/*   load fpa into r8,r9,r10 and cr6.  load fpb into r11, r12, r4 and cr7     */
     mr         r9,r3                     /* load fpa.exp, fpa.S, fpa.hi */
     mr         r12,r4                    /* load fpb.exp, fpb.S, fpb.hi */
     rlwinm     r8,r9,9,0xff              /* isolate exponent of fpa */
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -