📄 ppc_fmuls.s

📁 powerpc 405 优化过的硬浮点库
💻 S
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* fpopt/ppc_fmuls.S, pl_FPE_common, pl_linux 11/24/03 16:17:35                                                                  */
/*----------------------------------------------------------------------------- */
/*  Copyright (c) 2003, IBM Corporation                                         */
/*  All rights reserved.                                                        */
/*                                                                              */
/*  Redistribution and use in source and binary forms, with or                  */
/*  without modification, are permitted provided that the following             */
/*  conditions are met:                                                         */
/*                                                                              */
/*    * Redistributions of source code must retain the above                    */
/*      copyright notice, this list of conditions and the following             */
/*      disclaimer.                                                             */
/*    * Redistributions in binary form must reproduce the above                 */
/*      copyright notice, this list of conditions and the following             */
/*      disclaimer in the documentation and/or other materials                  */
/*      provided with the distribution.                                         */
/*    * Neither the name of IBM nor the names of its contributors               */
/*      may be used to endorse or promote products derived from this            */
/*      software without specific prior written permission.                     */
/*                                                                              */
/*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND                      */
/*  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,                 */
/*  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF                    */
/*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE                    */
/*  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS           */
/*  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,         */
/*  OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,                    */
/*  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR          */
/*  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY         */
/*  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT                */
/*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE           */
/*  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.    */
/*                                                                              */
/*----------------------------------------------------------------------------- */
/*                                                                              */
/* Function: multiply two single floating point values (frt = fpa * fpb)        */
/* Input:    r3(fpa), r4(fpb)                                                   */
/* Output:   r3(frt)                                                            */
/* Notes:   1. No stack frame is created for this function, so the following    */
/*             registers must be preserved, as required by ABI specification:   */
/*               LR, CR0, R1, R2, R13-R31                                       */
/*          2. operation performed according to IEEE754-1985 standard with      */
/*             rounding mode = nearest even.                                    */
/*                                                                              */
/*----------------------------------------------------------------------------- */

#include <ppc4xx.inc>
#include "fpeLib.inc"

function_prolog(__mulsf3)

     mfcr     r0                         /* save cr */
     
/* isolate exponents        */
     rlwinm   r8,r3,9,0xff               /* isolate exponent of fpa into r8 */
     rlwinm   r11,r4,9,0xff              /* isolate exponent of fpc into r11 */
     
/* test for normal:                                                             */
     cmpwi    cr0,r8,0xff                /* if (fpa.exp == SEXPMAX) */
     cmpwi    cr1,r8,0                   /* if (fpa.exp == 0) */
     cmpwi    cr2,r11,0xff               /* if (fpb.exp == SEXPMAX) */
     cmpwi    cr3,r11,0                  /* if (fpb.exp == 0) */
     beq      full                       /* fpa.exp is max */
     beq      cr2,full                   /* fpb.exp is max */
     beq      cr1,tst0a                  /* fpa.exp is 0  */
     beq      cr3,tst0b                  /* fpb.exp is 0  */

/* normal case only: add in implied one                           */
     oris     r9,r3,0x0080               /* fpa.hi |= 0x00800000; */
     oris     r12,r4,0x0080              /* fpb.hi |= 0x00800000; */
     
/*   Calculate exponent                                                         */
     add      r8,r8,r11                  /* result.expt = fpa.exp + fpb.exp; */
                                         /* ------- note:     r11 is now free */
     addic.   r8,r8,-126                 /* result.exp -= SEXPBIAS */
                                         /* result.exp += 1 (for multiplying left-just */
                                         /* numbers if result is left-just) */
                                         
     cmpwi    cr2,r8,253                 /* if (result.exp >= (SEXPMAX--2) :  */
     ble      full
     bge      cr2,full                
     
/*   Calculate sign:                                                            */
     xor      r11,r3,r4                  /* get product sign */
     
/*   Multiply the numbers:                                                      */
/*   Start by shifting everything left 8 bits, so that the MSW is full:        */
     rlwinm   r5,r9,8,0,23               /* r5 = fpa */
                                         
     rlwinm   r6,r12,8,0,23              /* r6 = fpb */
                                         /*   Multiply a and b (upper half only):                                        */
     mulhwu.  r5,r5,r6                   /* r5 gets high word of result */
                                         /* ------- note:     r6 is now free */
/*   There are two possibilities for the result:                        */
/*   Either (bit 0 is 1) or (bit 0 is 0 and bit 1 is 1).            */
/*   If bit 0 is 0, then    shift left 1 bit to normalize:                 */
     blt      msb_is_bit0
     add      r5,r5,r5                   /* shift fraction (r5) left 1 to fill msb*/
     addic.   r8,r8,-1                   /* shifted case: exponent is 1 less */
                                         /* than non-shifted */
     beq      full                       /* if exponent out of range, needs full handling */
msb_is_bit0:

/*   See if we need to round. (NOTE: the exact .5 case needs more precision */
/*   than what we have calculated; in that case,  go to the full handler.)   */
/*   24 bits (bits 0-23 of the result) are significant. The sticky bit          */
/*   is bit 24. If it is 0, then no rounding.                           */
/*   BUT we don't really know the sticky bit for sure yet -- the uncalculated */
/*   low-order 16 bits of the 48-bit product could conceivably cause the     low */
/*   bits of our result to round up, changing the state of bit 24.      */
/*   The round-up is the result of a carry-out from the low 12 bits,    */
/*   so at most it adds 1. But we have already                                  */
/*   shifted left one bit, just above, so by now the carry-out could add    */
/*   as much as 2. The addition of a 2-bit number can                           */
/*   propagate up to bit 24 ONLY if bits 25-30 are all ones:        */
     rlwinm.  r10,r5,0,25,31
     cmpwi    cr2,r10,0x7e               /* special case if bits 25-30 all 1's */
     rlwinm   r10,r5,0,23,24             /* test LSb and sticky bit */
     bge      cr2,full
     cmpwi    cr3,r10,0x80               /* special rounding if LSb=0 and sticky=1 */
     addic    r5,r5,0x80                 /* standard rounding = add 1 to sticky */
     bne+     cr3,std_done
     beq      full                       /* if LSb=0 and sticky=1 and 22-31 all */
                                         /* zeros, then special case */
std_done: 
     mcrxr    cr0                        /* if carry out, increment exponent */
     bf       cr0_2,nocarry
     addi     r8,r8,1
nocarry:     
          
/*   We're done!   Just assemble the result back into IEEE format:        */
/*   r3 = IEEE result                                                     */
/*   Shift right 8 bits to put the MSb into bit 8, but */
/*   mask it off since it's implied:                                        */
     rlwinm   r3,r5,32-8,9,31
     
/*   Put     the exponent into bits 1-8 of the MSW:                            */
     rlwimi   r3,r8,23,1,8
     
/*   Insert sign     bit:                                                       */
     rlwimi   r3,r11,0,0,0

     mtcr     r0                         /* restore cr */
     blr                                 /* return; */

tst0a:
     rlwinm.  r11,r3,0,1,31              /* FRA = +-0 ? */
     beq      return0
     beq      cr3,tst0b                  /* fpb.exp == 0 ? (held in cr3)*/
     b        full
/*   a is 0:                                                                    */
/*   Calculate sign:                                                            */
return0:
     xor      r3,r3,r4                   /* get product sign */
     rlwinm   r3,r3,0,0,0                /* *FRT = fpa.hi = 0+sign */
     mtcr     r0                         /* restore cr */
     blr                                 /* return */

tst0b:
     rlwinm.  r11,r4,0,1,31              /* FRC = +-0 ? */
     bne      full
/*   b is 0:                                                                    */
     b        return0

/*---------------- end of normal-only case ---------------                      */
full:
.fmsful:
     mtcr     r0                        /* restore CR */      
     mtctr    r0                        /* save cr in ctr */   
     
/* load fpa into r8,r9 and cr6.  load fpb into r11, r12 and cr7     */
     mr       r9,r3                     /* load fpa.exp, fpa.S, fpa.hi */
     mr       r12,r4                    /* load fpb.exp, fpb.S, fpb.hi */
     rlwinm   r8,r9,9,0xff              /* isolate exponent of fpa */
     rlwinm   r11,r12,9,0xff            /* isolate exponent of fpb */
     cmpwi    cr6,r9,0                  /* set fpa.sign */
     cmpwi    cr7,r12,0                 /* set fpb.sign */
     rlwinm.  r9,r9,0,0x007fffff        /* isolate fpa.hi */
     cror     cr6_zero,cr0_2,cr0_2      /* fpa.zero = fpa.hi == 0 */
     rlwinm.  r12,r12,0,0x007fffff      /* isolate fpb.hi */
     cror     cr7_zero,cr0_2,cr0_2      /* fpb.zero = fpa.hi == 0 */
     cmpwi    cr0,r8,0xff               /* if (fpa.exp == SEXPMAX) */
     crand    cr6_inf,cr6_zero,cr0_2    /* fpa.inf=(fpa.exp==SEXPMAX && fpa==0) */
     crandc   cr6_NaN,cr0_2,cr6_zero    /* fpa.NaN=(fpa.exp==SEXPMAX && fpa!=0) */
     cmpwi    cr0,r8,0                  /* if (fpa.exp == 0) */
     crand    cr6_zero,cr6_zero,cr0_2   /* fpa.zero=(fpa.exp==0 && fpa==0) */
     crandc   cr0_2,cr0_2,cr6_zero      /* if (fpa.exp==0 && fpa!=0) */
     cmpwi    cr1,r11,0xff              /* if (fpb.exp == SEXPMAX) */
     beq      denormal_a                /* {       Add implied 1 to significand */
     oris     r9,r9,0x0080              /*    fpa.hi |= 0x00800000; */
     b        adone                     /* } else { */
denormal_a:
     addi     r8,r8,1                   /*    fpa.exp++; */
adone:                                  /* } */
     crand    cr7_inf,cr7_zero,cr1_2    /* fpb.inf=(fpb.exp==SEXPMAX && fpb==0) */
     crandc   cr7_NaN,cr1_2,cr7_zero    /* fpb.NaN=(fpb.exp==SEXPMAX && fpb!=0) */
     cmpwi    cr0,r11,0                 /* if (fpb.exp == 0) */
     crand    cr7_zero,cr7_zero,cr0_2   /* fpb.zero=(fpb.exp==0 && fpb==0) */
     crandc   cr0_2,cr0_2,cr7_zero      /* if (fpb.exp==0 && fpb!=0) */
     beq      denormal_b                /* {      Add implied 1 to significand */
     oris     r12,r12,0x0080            /*    fpb.hi |= 0x00800000; */
     b        bdone                     /* } else { */
denormal_b:
     addi     r11,r11,1                 /*   fpb.exp++; */
bdone:                                  /* } */
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -