📄 ppc_fdiv.s

📁 powerpc 405 优化过的硬浮点库
💻 S
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* fpopt/ppc_fdiv.S, pl_FPE_common, pl_linux 11/24/03 16:17:31                                                                  */
/*----------------------------------------------------------------------------- */
/*  Copyright (c) 2003, IBM Corporation                                         */
/*  All rights reserved.                                                        */
/*                                                                              */
/*  Redistribution and use in source and binary forms, with or                  */
/*  without modification, are permitted provided that the following             */
/*  conditions are met:                                                         */
/*                                                                              */
/*    * Redistributions of source code must retain the above                    */
/*      copyright notice, this list of conditions and the following             */
/*      disclaimer.                                                             */
/*    * Redistributions in binary form must reproduce the above                 */
/*      copyright notice, this list of conditions and the following             */
/*      disclaimer in the documentation and/or other materials                  */
/*      provided with the distribution.                                         */
/*    * Neither the name of IBM nor the names of its contributors               */
/*      may be used to endorse or promote products derived from this            */
/*      software without specific prior written permission.                     */
/*                                                                              */
/*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND                      */
/*  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,                 */
/*  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF                    */
/*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE                    */
/*  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS           */
/*  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,         */
/*  OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,                    */
/*  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR          */
/*  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY         */
/*  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT                */
/*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE           */
/*  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.    */
/*                                                                              */
/*----------------------------------------------------------------------------- */
/*                                                                              */
/* Function: divide two double floating point values.  frt = fpa / fpb          */
/* Input:    r3,r4(fpa)                                                         */
/*           r5,r6(fpb)                                                         */
/* Output:   r3,r4(frt)                                                         */
/* Notes:   1. A stack frame is created, following ABI specification, and       */
/*             non-volatile registers are saved and restored on the stack.      */
/*          2. operation performed according to IEEE754-1985 standard with      */
/*             rounding mode = nearest even.                                    */
/*                                                                              */
/*----------------------------------------------------------------------------- */

#include <ppc4xx.inc>
#include "fpeLib.inc"

#define first_loop 4 

function_prolog(__divdf3)

        mflr    r0                      /* save link register in caller's stack */
        stw     r0,4(r1)               
        stwu    r1,-STACKSIZE(r1)       /* set up stack frame to hold saved regs */

        SAVEREG(30)
        SAVEREG(31)
        mfcr    r0                      /* save cr */
/* load fpa into r8,r9,r10 and cr6.  load fpb into r11, r12, r13 and cr7        */
        mr      r9,r3                   /* load fpa.exp, fpa.S, fpa.hi */
        mr      r12,r5                  /* load fpb.exp, fpb.S, fpb.hi */
        rlwinm  r8,r9,32-20,0x7ff       /* isolate exponent of fpa into r8 */
        rlwinm  r11,r12,32-20,0x7ff     /* isolate exponent of fpb into r11 */
        mr      r10,r4                  /* load fpa.lo */
        cmpwi   cr6,r9,0                /* set fpa.sign */
                                        /* load fpb.lo (already in r6)*/
        cmpwi   cr7,r12,0               /* set fpb.sign */
/* test for normal:                                                             */
        cmpwi   cr0,r8,0x7ff            /* if (fpa.exp == DEXPMAX) */
        cmpwi   cr1,r8,0                /* if (fpa.exp == 0) */
        cmpwi   cr2,r11,0x7ff           /* if (fpb.exp == DEXPMAX) */
        cmpwi   cr3,r11,0               /* if (fpb.exp == 0) */
        beq     full
        beq     cr1,full                
        beq     cr2,full                
        beq     cr3,full                

/* normal case only:                                                            */
        oris    r9,r9,0x0010            /*    fpa.hi |= 0x00100000; */
        oris    r12,r12,0x0010          /*    fpb.hi |= 0x00100000; */
/*   Calculate exponent                                                         */
        subf    r8,r11,r8               /* result.exp = fpa.exp - fpb.exp; */
                                        /* ------- note: r11 is now free */
        addic.  r8,r8,+1023             /* result.exp += DEXPBIAS; check for negative */
                                        /* result.exp += 1 (for dividing left-just */
                                        /* numbers if result is left-just) */
        cmpwi   cr2,r8,0x7ff            /* if (result.exp >= DEXPMAX) */
        ble     full
        bge     cr2, full               /* cr2 */
/*   Zero out the non-number part of the operands:                              */
/*   for the sign bit plus one working bit:                                     */
        rlwinm  r9,r9,0,11,31           /* r9 = fpa.hi */
                                        /* r10 = fpa.lo */

        rlwinm  r12,r12,0,11,31         /* r12 = fpb.hi */
                                        /* r6 = fpb.lo */

/*   Divide the numbers (54 bits of long division);                             */
/*   r7/r11 hold the intermediate differences.                                  */
        li      r7,54                   /* calculate 53 signif + 1 sticky */
        mtctr   r7
longdivloop:
/*   r7/r11 = r9/r10 - r12/r6:                                                  */
        subfc   r11,r6,r10
        subfe.  r7,r12,r9
        adde    r31,r31,r31             /* rot result left and put CA into LSb of result */
        adde    r30,r30,r30             /* rotate r30/r31 left 1 bit */
        blt+    zerobit
                                        /* if intermed diff positive then: */
        addc    r10,r11,r11             /* r9/r10 get r7/r11 << 1 */
        adde    r9,r7,r7                /* */
        bdnz    longdivloop
        b       done54

zerobit:                                /* else: */
        addc    r10,r10,r10             /* r9/r10 get r9/r10 << 1 */
        adde    r9,r9,r9                /* */
        bdnz    longdivloop
done54:

/*   Since we divided one left-justified number by another left-justified       */
/*   number, there are two possibilities for the result:                        */
/*   either (the msb is 1) or                                                   */
/*   (the msb is 0 and the next bit is 1). If the msb is 0, then we need to     */
/*   calculate one more bit (to make 54 significant bits).                      */
/*   NOTE: the msb is now in bit 10 (=64-54) of result.hi=r30, because we have  */
/*   calculated 54 bits.                                                        */
        rlwinm. r7,r30,0,10,10          /* test hi-bit = bit 10 of result.hi */
        bne+    highbit_is_1
        addic.  r8,r8,-1                /* shifted case: exponent is 1 less than */
                                        /* non-shifted. */
        beq     full                    /* exponent=0 is special case */
        addi    r7,0,1
        mtctr   r7
        b       longdivloop
highbit_is_1:

/*   We now have the first 54 bits of the result. Bits beyond 53 are only       */
/*   needed to determine rounding. If bit 54 (the last one just calculated)     */
/*   is 0, then we don't round. If it is 1, then we'll calculate up to 12 more  */
/*   bits, looking for a 1. The first time we see a 1, we know to round up.     */
/*   But even if all 12 are 0, we still don't know for sure to not round,       */
/*   so we punt to the full routine.                                            */
        rlwinm  r7,r31,0,30,31
        cmpwi   r7,1
        bne     roundup
/*   At this point the LSb is 0 and the sticky bit is 1; we need more bits      */
/*   to determine rounding -- if any subsequent bit is 1, then we round up.     */
/*   We'll try 12 more bits before giving up:                                   */
        addi    r7,0,12
        mtctr   r7
seek1loop:
/*   r7/r11 = r9/r10 - r12/r6:                                                  */
        subfc   r11,r6,r10
        subfe.  r7,r12,r9
        bge     roundup
                                        /* (result(this bit) = 0) */
        addc    r10,r10,r10             /* r9/r10 get r9/r10 << 1 */
        adde    r9,r9,r9                /* */
        bdnz    seek1loop
        b       full
roundup:                                /* adding 1 to the sticky bit performs standard */
        addic   r31,r31,1               /* rounding; i.e. if sticky=0 then other bits */
        addze   r30,r30                 /* are not affected */

/*   We're done! Just assemble the result back into IEEE format:                */
/*   r30 = IEEE MSW,   r31 = IEEE LSW                                           */
/*   We need to get the msb from bit 10 to bit 11 of result.hi; so shift        */
/*   right 1 bit:                                                               */
        rlwinm  r31,r31,31,1,31         /* result >> 1 */
        rlwimi  r31,r30,31,0,0          /* */
        rlwinm  r30,r30,31,12,31        /* */
/*   Put the exponent into bits 1-11 of the MSW:                                */
        rlwimi  r30,r8,20,1,11
/*   Calculate sign bit:                                                        */
        crxor   cr6_sign,cr6_sign,cr7_sign
        bf      cr6_sign,signbit0
        oris    r30,r30,0x8000          /* set sign bit */
signbit0:
        mr      r3,r30                  /* *FRT = fpa.hi; */
        mr      r4,r31                  /* *FRT = fpa.lo; */
        RESTREG(30)
        RESTREG(31)
        mtcr    r0                      /* restore cr */
        
return_common:                     
        lwz     r1,0(r1)                /* previous stack frame */
        lwz     r0,4(r1)                /* saved link register */
        mtlr    r0                      /* restore link register */
        blr                             /* return */

/*---------------- end of normal-only case ---------------                      */
full:
        RESTREG(30)
        RESTREG(31)
        mtcr    r0                      /* restore CR */


        SAVEREG(16)                     /* save r16 */
        mfcr    r16                     /* save cr */
/* load fpa into r8,r9,r10 and cr6.  load fpb into r11, r12, r6 and cr7         */
        mr      r9,r3                   /* load fpa.exp, fpa.S, fpa.hi */
        mr      r12,r5                  /* load fpb.exp, fpb.S, fpb.hi */
        rlwinm  r8,r9,32-20,0x7ff       /* isolate exponent of fpa */
        rlwinm  r11,r12,32-20,0x7ff     /* isolate exponent of fpb */
        mr      r10,r4                  /* load fpa.lo */
        cmpwi   cr6,r9,0                /* set fpa.sign */
                                        /* load fpb.lo (already in r6)*/
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -