⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ppc_fmuls.s

📁 powerpc 405 优化过的硬浮点库
💻 S
📖 第 1 页 / 共 2 页
字号:

/* if (fpa == NaN) return QNaN;                                                 */
     bt       cr6_NaN,a_NaN             /* if (fpa == NaN) goto a_NaN; */
     
/* if (fpb == NaN) return QNaN;                                                 */
     bt       cr7_NaN,b_NaN             /* if (fpb == NaN) goto b_NaN; */
     
/* if (fpa == 0) return fpb;                                                */
     bt       cr6_zero,a_zero           /* if (fpa == 0) goto a_zero; */
     
/* if (fpb == 0) return fpa;                                                */
     bt       cr7_zero,b_zero           /* if (fpb == 0) goto b_zero; */
     
/* check for infinities                                                     */
     bt       cr6_inf,a_INF             /* if (fpa.INF) goto a_INF; */
     bt       cr7_inf,b_INF             /* if (fpb.INF) goto b_INF; */
     
/* Calculate exponent                                                           */
     add      r8,r8,r11                 /* fpa.exp += fpb.exp; */
     addi     r8,r8,-(127)              /* fpa.exp -= (SEXPBIAS); */
     
/* Calculate sign                                                               */
     crxor    cr6_sign,cr6_sign,cr7_sign
     
/* Put a and b significands into r11 and r12 (b already in 12)                  */
     mr       r11,r9
          
/* Multiply a and b then shift left 9 to left-justify                           */
     mulhwu   r9,r11,r12                /* r9,r11 = fpa * fpb; */
     mullw    r10,r11,r12               /* r9,r11 = fpa * fpb; */
   
     rlwinm   r9,r9,9,0xfffffe00     
     rlwimi   r9,r10,9,0x1ff          
     rlwinm   r10,r10,9,0xfffffe00               
             
/* Normalize results                                                            */
     cntlzw   r5,r9                     /* s = cntlz(fpa.hi); */
     cmpwi    cr0,r5,8                  /* if (s     < 8) */
     bge      noshrght                  /* { */
     subfic   r5,r5,8                   /*   r5 = 8-s; */
     subfic   r11,r5,32                 /*   r11 = 24+s; */
     srw      r10,r10,r5                /*   fpa.lo >>= (8-s); */
     slw      r6,r9,r11                 /*   temp = fpa.hi << (21+s); */
     or       r10,r10,r6                /*   fpa.lo |= temp; */
     srw      r9,r9,r5                  /*   fpa.hi >>= (8-s); */
     add      r8,r8,r5                  /*   fpa.exp += (8-s); */
     cmpwi    cr0,r8,0xff               /*   if (fpa.exp == SEXPMAX) */
     bne      noshift                   /*   { */
     addi     r9,0,0                    /*     fpa.hi = 0; */
     addi     r10,0,0                   /*     fpa.lo = 0; */
                                        /*   } */
     b        noshift                   /* } */
noshrght:                               /* else if (s > 8) */
     beq      noshift                   /* { */
     cmpwi    cr0,r5,32                 /*   if (s < 32) */
     bge      gt32                      /*   { */
     addi     r5,r5,-8                  /*     r5 = s-8; */
     subf     r8,r5,r8                  /*     fpa.exp -= (s-8); */
     slw      r9,r9,r5                  /*     fpa.hi <<= (s-8); */
     subfic   r11,r5,32                 /*     r11 = 46-s; */
     srw      r4,r10,r11                /*     temp == fpa.lo >> (46-s); */
     or       r9,r9,r4                  /*     fpa.hi |= temp; */
     slw      r10,r10,r5                /*     fpa.lo <<= (s-8); */
     b        noshift                   /*   } */
gt32:                                   /*   else */
                                        /*   { */
     cntlzw   r5,r10                    /*     s = cntlz(fpa.lo); */
     addi     r4,r5,32-8                /*     r4 = s+(32-8); */
     subf     r8,r4,r8                  /*     fpa.exp -= (s+(32-8)); */
     addic.   r5,r5,-8                  /*     r5 = s-11; */
     ble      sh32le                    /*     if (shift > 8) { */
     slw      r9,r10,r5                 /*     fpa.hi = (fpa.lo << (s-8)); */
     b        noshift                   /*     } else { */
sh32le:       
     subfic   r5,r5,0                   /*     r5 = 8-s; */
     srw      r9,r10,r5                 /*     fpa.hi = fpa.lo >> (8-s); */
     subfic   r11,r5,32                 /*     r11 = 32-(8-s); */
     slw      r10,r10,r11               /*     fpa.lo <<= (32-(8-s)); */
                                        /*     } */
                                        /*   } */
noshift:                                /* } */
/* Fix up number in normalized range                                  */
     cmpwi    cr0,r8,1                  /* else if(fpa.exp >= 1) */
     blt      denormexp                 /* { */
/* Check for single overflow                                                     */
     cmpwi    cr0,r8,255                /*   if (fpa.exp >= (SEXPMAX)) */
     blt+     roundit                   /*   { */
     addi     r8,0,0xff                 /*     fpa.exp = SEXPMAX; */
     addi     r9,0,0                    /*     fpa.hi = 0; */
     b        noround                   /*     goto noround; */
                                        /*   } */
roundit:     
     addis    r12,0,0x8000              /*                  */
     cmplw    cr1,r10,r12               /*   if (round >= 0x80000000) guard set */
     blt      cr1,noround               /*   { */
     andi.    r0,r9,0x00000001         /*     if (((fpa.lobit )==0) && */
     crand    cr0_2,cr0_2,cr1_2         /*        (round == 0x80000000)) no sticky or round*/
     beq      noround                   /*     goto noround; */
     addi     r9,r9,1                   /*     fpa.hi++; */
     addis    r6,0,0x0100               /*               */
     cmpw     cr0,r9,r6                 /*     if (fpa.hi == 0x01000000) carry out */
     bne      noround                   /*     { */
     addi     r8,r8,1                   /*       fpa.exp++; */
                                        /*     } */
                                        /*     } */
                                        /*   } */
     b        noround                   /* } */
denormexp:                              /* else */
                                        /* { */
     subfic   r12,r8,1                  /*   shift= 1 - fpa.exp; */
     li       r8,0                      /*   fpa.exp = 0; */
     cmpwi    cr0,r12,25                /*   if ( shift < 25) */
     bge      rnd2zero                  /*   { */
     subfic   r11,r12,32                /*     r11 = 32-shift; */
     slw.     r0,r10,r11                /*     round = fpa.lo << (32-shift); */
     srw      r5,r10,r12                /*     r5 = fpa.lo >> shift; */
     slw      r6,r9,r11                 /*     temp = fpa.hi << (32-shift); */
     or       r5,r5,r6                  /*     r5 |= temp; */
     srw      r4,r9,r12                 /*     r4 = fpa.hi >> shift; */
     beq      nostk                     /*     if (round) */
     ori      r5,r5,0x0001              /*     r5 |= 0x00000001; */
nostk:        
     addis    r6,0,0x8000               /*                      */
     cmplw    cr1,r5,r6                 /*     if (round >= 0x80000000) guard set */
     blt      cr1,chkzro                /*     { */
     andi.    r0,r4,0x00000001          /*       if ( fpa.lobit==0) && */
     crand    cr0_2,cr0_2,cr1_2         /*          (round == 0x80000000)) no sticky or round*/
     beq      chkzro                    /*        goto nornd; */
     addi     r4,r4,1                   /*       fpa.hi++; */
     addis    r6,0,0x0080               /*                        */
     cmpw     cr0,r4,r6                 /*       if (fpa.hi == 0x00800000) carry out */
     bne      chkzro                    /*       { */
     addi     r8,r8,1                   /*         fpa.exp++; */
chkzro:                                 /*       } */
                                        /*     } */
     cmpwi    cr0,r4,0                  /*     if (fpa.hi == 0 ) */
     mr       r9,r4                                        
     beq      rnd2zero                  /*     goto rnd2zero; */
     b        noround                   /*   } */
rnd2zero:                               /*   else */
                                        /*   { */
     addi     r8,0,0                    /*     fpa.exp = 0; */
     addi     r9,0,0                    /*     fpa.hi = 0; */
                                        /*   } */
                                        /* } */
/* fpt = fpa;                                                               */
noround:                                /* } */
     rlwimi   r9,r8,23,0x7f800000       /* fpa.hi |= fpa.exp << 23; */
     bf       cr6_sign,nosign           /* if (fpa.sign)     { */
     oris     r9,r9,0x8000              /*   fpa.hi |= 0x80000000; */
nosign:                                 /* } */
     mr       r3,r9
     mfctr    r0                        /* restore cr */
     mtcr     r0
     blr                                /* return; */

a_zero:                                 /* fpa (r3) == 0; */
     bt       cr7_inf,rtn_NaN           /* if (!fpb.inf) goto rtn_NaN; */
     crxor    cr0_0,cr6_sign,cr7_sign   /* crbit0 = fpa.sign ^ fpb.sign; */
     mfcr     r0                        /* r0 = cr; */
     rlwimi   r3,r0,0,0x80000000        /* insert r0 bit 0 into r8 bit 0 */
     mfctr    r0                        /* restore cr */
     mtcr     r0
     blr                                /* return; */
rtn_NaN:
     oris     r3,0,0x7fc0               /* fpa = QNaN; */
     mfctr    r0                        /* restore cr */
     mtcr     r0
     blr                                /* return; */

b_zero:                                 /* return fpa; */
     bt       cr6_inf,rtn_NaN           /* if (!fpa.inf) goto rtn_NaN; */
     crxor    cr0_0,cr6_sign,cr7_sign   /* crbit0 = fpa.sign ^ fpb.sign; */
     mfcr     r0                        /* r0 = cr; */
     rlwimi   r3,r0,0,0x80000000        /* insert r0 bit 0 into r8 bit 0 */
     mfctr    r0                        /* restore cr */
     mtcr     r0                      
     blr                                /* return; */
                                      
a_NaN:                                  /* return QNaN; */
     oris     r3,r3,0x0040              /* FRA->hi |= 0x400000; */
     mfctr    r0                        /* restore cr */
     mtcr     r0                      
     blr                                /* return; */
                                      
b_NaN:                                  /* return QNaN; */
     mr       r3,r4                 
     oris     r3,r3,0x0040              /* FRB->hi |= 0x400000; */
     mfctr    r0                        /* restore cr */
     mtcr     r0                      
     blr                                /* return; */
                                      
a_INF:                                  /* return INF; */
b_INF:                                  /* return INF; */
     addis    r3,0,0x7f80               /* FRT = INF; */
     crxor    cr0_0,cr6_sign,cr7_sign   /* crbit0 = fpa.sign ^ fpb.sign; */
     mfcr     r0                        /* r0 = sign of result; */
     rlwimi   r3,r0,0,0x80000000        /* insert r0 bit 0 into r8 bit 0 */
     mfctr    r0                        /* restore cr */
     mtcr     r0
     blr                                /* return; */
                 
function_epilog(__mulsf3)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -