⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ppc_fdivs.s

📁 powerpc 405 优化过的硬浮点库
💻 S
📖 第 1 页 / 共 3 页
字号:
     rlwinm     r11,r12,9,0xff            /* isolate exponent of fpb */
     cmpwi      cr6,r9,0                  /* set fpa.sign */
     cmpwi      cr7,r12,0                 /* set fpb.sign */
     rlwinm.    r9,r9,0,0x007fffff        /* isolate fpa.hi */
     cror       cr6_zero,cr0_2,cr0_2      /* fpa.zero = fpa.hi == 0 */
     rlwinm.    r12,r12,0,0x007fffff      /* isolate fpb.hi */
     cror       cr7_zero,cr0_2,cr0_2      /* fpb.zero = fpa.hi == 0 */
     SAVEREG(20)                          /* save r20 */
     cmpwi      cr0,r8,0xff               /* if (fpa.exp == SEXPMAX) */
     SAVEREG(21)                          /* save r21 */
     crand      cr6_inf,cr6_zero,cr0_2    /* fpa.inf=(fpa.exp==SEXPMAX && fpa==0) */
     SAVEREG(22)                          /* save r22 */
     crandc     cr6_NaN,cr0_2,cr6_zero    /* fpa.NaN=(fpa.exp==SEXPMAX && fpa!=0) */
     cmpwi      cr0,r8,0                  /* if (fpa.exp == 0) */
     SAVEREG(23)                          /* save r23 */
     crand      cr6_zero,cr6_zero,cr0_2   /* fpa.zero=(fpa.exp==0 && fpa==0) */
     SAVEREG(24)                          /* save r24 */
     crandc     cr0_2,cr0_2,cr6_zero      /* if (fpa.exp==0 && fpa!=0) */
     cmpwi      cr1,r11,0xff              /* if (fpb.exp == SEXPMAX) */
     beq        denormal_a                /* {       Add implied 1 to significand */ 
     oris       r9,r9,0x0080              /*    fpa.hi |= 0x00800000; */
     b          adone                     /* } else { */
denormal_a:
     addi       r8,r8,1                   /*    fpa.exp++; */
adone:                                    /* } */
     crand      cr7_inf,cr7_zero,cr1_2    /* fpb.inf=(fpb.exp==SEXPMAX && fpb==0) */
     SAVEREG(25)                          /* save r25 */
     crandc     cr7_NaN,cr1_2,cr7_zero    /* fpb.NaN=(fpb.exp==SEXPMAX && fpb!=0) */
     cmpwi      cr0,r11,0                 /* if (fpb.exp == 0) */
     SAVEREG(26)                          /* save r26 */
     crand      cr7_zero,cr7_zero,cr0_2   /* fpb.zero=(fpb.exp==0 && fpb==0) */
     SAVEREG(27)                          /* save r27 */
     crandc     cr0_2,cr0_2,cr7_zero      /* if (fpb.exp==0 && fpb!=0) */
     beq        denormal_b                /* {         Add implied 1 to significand */
     oris       r12,r12,0x0080            /*    fpb.hi |= 0x00800000; */
     b          bdone                     /* } else { */
denormal_b:
     addi       r11,r11,1                 /*   fpb.exp++; */
bdone:                                    /* } */

     bt         cr6_NaN,a_NaN             /* if (fpa.NaN) goto a_NaN; */
     bt         cr7_NaN,b_NaN             /* if (fpb.NaN) goto b_NaN; */
     bt         cr6_zero,a_zero           /* if (fpa.zero)     goto a_zero; */
     bt         cr6_inf,a_INF             /* if (fpa.inf) goto a_INF; */
     bt         cr7_zero,b_zero           /* if (fpb.zero)     goto b_zero; */
     bt         cr7_inf,b_INF             /* if (fpb.inf) goto b_INF; */

     li         r10,0                     /* zero fpa.lo for double precision divide */ 
     
/*   left justify divisor (really only needed for denormal numbers)    */
     cntlzw     r6,r12                    /* s = cntlz(fpb.hi); */
     addi       r6,r6,-8                  /*   r6 = s-8; */
     subf       r11,r6,r11                /*   fpb.exp -= (s-8); */
     slw        r12,r12,r6                /*   fpb.hi <<= s-8; */
     
/*   right justify     a 16 bit divisor                                       */
     rlwinm     r6,r12,32-8,0x0000ffff    /* dr = fpb.hi >> 8; */
     
/*   Calculate sign                                                               */
     crxor      cr6_sign,cr6_sign,cr7_sign
     
/*   set up for loop                                                              */
     crset      first_loop                /* first_loop = true; */
     li         r0,3                      /* r0 = 3; */
     mtctr      r0                        /* ctr =     3; */
     
do_divide:                                /* for (nest=0;nest<3;nest++) { */
/*   force dividend into one register                                         */
     cntlzw     r4,r9                     /* s = cntlz(fpa.hi); */
     cmpwi      cr0,r4,8                  /* if (s     > 8) */
     ble        dddone                    /* { */
     cmpwi      cr0,r4,32                 /*   if (s < 32) */
     addi       r4,r4,-8                  /*     r4 = s-8; */
     subfic     r5,r4,32                  /*     r5 = 40-s; */
     bge        ashgt32                   /*   { */
     subf       r8,r4,r8                  /*     fpa.exp -= (s-8); */
     slw        r9,r9,r4                  /*     fpa.hi <<= s-8; */
     srw        r0,r10,r5                 /*     temp = fpa.lo >> 40-s; */
     or         r9,r9,r0                  /*     fpa.hi |=     temp; */
     b          dddone                    /*   } */
ashgt32:                                  /*   else */
                                          /*   { */   
     addi       r8,r8,-32                 /*     fpa.exp -= 32; */
     cntlzw     r4,r10                    /*     s     = cntlz(fpa.lo); */
     cmpwi      cr0,r4,8                  /*     if (s >= 8) */
     blt        ashgt43                   /*     { */
     addi       r4,r4,-8                  /*       r4 = s-8; */
     subf       r8,r4,r8                  /*       fpa.exp     -= (s-8); */
     slw        r9,r10,r4                 /*       fpa.hi = fpa.lo     << s-8; */
     b          dddone                    /*     } */
ashgt43:                                  /*     else */
                                          /*     { */
     subfic     r4,r4,8                   /*       r4 = 8-s; */
     add        r8,r4,r8                  /*       fpa.exp     += (8-s); */
     srw        r9,r10,r4                 /*       fpa.hi = fpa.lo     >> 8-s; */
                                          /*     } */
                                          /*   } */
dddone:                                   /* } */
/*   left justify dividend                                                        */
     rlwinm     r5,r9,8,0xffffff00        /* dd = fpa.hi << 8; */
/*   estimate quotient (truncate to 16 bits)                                      */
     divwu      r27,r5,r6                 /* e.hi = dd/dr; */
     cntlzw     r4,r27                    /* s = cntlz(e.hi); */
     addi       r26,r8,127+15             /* e.exp     = fpa.exp + SEXPBIAS+15  */
     subf       r26,r11,r26               /* e.exp     -= fpb.exp; */
     subf       r26,r4,r26                /* e.exp     -= s; */
     addi       r4,r4,-8                  /* r4 = s-8; */
     rlwnm      r27,r27,r4,0x00ffff00     /* e.hi = (e.hi << s-8) & 0x00ffff00; */
eshdone:                                            
/*   t (temporary) = b (divisor) * e (estimate)                           */
/*   Multiply b and e                                                             */
     mulhwu     r21,r12,r27               /* t.hi,t.lo = fpb.hi * e; */
     mullw      r22,r12,r27
     rlwinm     r21,r21,9,0xfffffe00      /* t.hi <<= 9; */
     rlwimi     r21,r22,32-23,0x1ff       /* t.hi += ((t.lo>>23)&0x1ff); */
     rlwinm     r22,r22,9,0xfffffe00      /* t.lo <<= 9; */
     add        r20,r11,r26               /* t.exp     = fpb.exp + e.exp; */
     addi       r20,r20,-127              /* t.exp     -= (SEXPBIAS); */

/*   r (remainder) = a (dividend) - t (temporary)                             */
     cmpw       cr0,r20,r8                /* if (t.exp < fpa.exp) */
     subf       r4,r20,r8                 /*   shift = fpa.exp - t.exp; */
     subfic     r5,r4,32                  /*   r5 = 32-shift; */
     bge+       tnoshift                  /* { */
     srw        r22,r22,r4                /*   t.lo >>= shift; */
     slw        r0,r21,r5                 /*   temp = t.hi     << (shift-32); */
     or         r22,r22,r0                /*   t.lo |= temp; */
     srw        r21,r21,r4                /*   t.hi >>= shift; */
tnoshift:
     subfic     r22,r22,0                 /* t = -t; */
     subfze     r21,r21
     add.       r21,r21,r9                /* r.hi = fpa.hi + t.hi; */
     bge        addinq                    /* if (r.hi >= 0) goto addinq; */

/* remainder is neg; estimate too high. decrement and retry.                    */
     addi       r27,r27,-256              /* e.hi -= 0x100; */
     b          eshdone                   /* goto eshdone; */

addinq:
/* c (quotient) += e (estimate)                                                 */
     bf+        first_loop,notfirst    /* if (first_loop) { */
     crclr      first_loop                /*   first_loop = false; */
     cntlzw     r23,r27                   /*   r23 = cntlz(e.hi); */
     addi       r25,r23,-8                /*   r25 = r23 -     8; */
     subf       r23,r25,r26               /*   c.exp = e.exp - r25; */
     slw        r24,r27,r25               /*   c.hi = e.hi     << r25; */
     li         r25,0                     /*   c.lo = 0; */
     b          adddone                   /* } */
notfirst:                                 /* else */
                                          /* { */
     subf       r20,r26,r23               /*   shift = c.exp - e.exp; */
     cmpwi      cr0,r20,32                /*   if (shift <     32) */
     subfic     r5,r20,32                 /*     r5 = 32-shift; */
     bge        shgt32                    /*   { */
     slw        r4,r27,r5                 /*     e.lo = e.hi << (32-shift); */
     srw        r27,r27,r20               /*     e.hi >>= shift; */
     b          addit                     /*   } */
shgt32:                                   /*   else */
                                          /*   { */
     addi       r20,r20,-32               /*     shift -= 32; */
     srw        r4,r27,r20                /*     e.lo = e.hi >> shift; */
     li         r27,0                     /*     e.hi = 0; */
addit:                                    /*   } */
     add        r25,r25,r4                /*   c.lo += e.lo; */
     add        r24,r24,r27               /*   c.hi += e.hi; */
adddone:                                  /* } */
     or.        r0,r21,r22                /* if (r.hi == 0     && r.lo     ==0) */
     mr         r9,r21                    /* a.hi = r.hi; */
     mr         r10,r22                   /* a.lo = r.lo; */
     beq        divdone                   /*   break; */
     bdnz       do_divide                 /* } end for */
divdone:

/* put results from r23,r24,r25 into r8,r9 and r10                                 */
     mr         r8,r23                    /* fpa.exp = c.exp; */
     mr         r9,r24                    /* fpa.hi = c.hi; */
     mr         r10,r25                   /* fpa.lo = c.lo; */

     RESTREG(20)                          /* restore r20 */
     RESTREG(21)                          /* restore r21 */
     RESTREG(22)                          /* restore r22 */
     RESTREG(23)                          /* restore r23 */

/* Normalize results                                                            */
     cntlzw     r5,r9                     /* s = cntlz(fpa.hi); */
     cmpwi      cr0,r5,8                  /* if (s < 8) */
     RESTREG(24)                          /* restore r24 */
     RESTREG(25)                          /* restore r25 */
     bge     noshrght                     /* { */
     subfic     r5,r5,8                   /*   r5 = 8-s; */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -