⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ppc_fdiv.s

📁 powerpc 405 优化过的硬浮点库
💻 S
📖 第 1 页 / 共 3 页
字号:
        cmpwi   cr7,r12,0               /* set fpb.sign */
        rlwinm. r9,r9,0,0x000fffff      /* isolate fpa.hi */
        SAVEREG(17)                     /* save r17 */
        cror    cr6_zero,cr0_2,cr0_2    /* fpa.zero = fpa.hi == 0 */
        rlwinm. r12,r12,0,0x000fffff    /* isolate fpb.hi */
        SAVEREG(18)                     /* save r18 */
        cror    cr7_zero,cr0_2,cr0_2    /* fpb.zero = fpa.hi == 0 */
        cmpwi   cr0,r10,0               /* if (fpa.lo == 0) */
        SAVEREG(19)                     /* save r19 */
        crand   cr6_zero,cr6_zero,cr0_2 /* fpa.zero = fpa.hi==0 && fpa.lo==0 */
        cmpwi   cr0,r6,0                /* if (fpa.lo == 0) */
        SAVEREG(20)                     /* save r20 */
        crand   cr7_zero,cr7_zero,cr0_2 /* fpb.zero = fpb.hi==0 && fpb.lo==0 */
        cmpwi   cr2,r8,0x7ff            /* if (fpa.exp == DEXPMAX) */
        SAVEREG(21)                     /* save r21 */
        SAVEREG(22)                     /* save r22 */
        cmpwi   cr0,r8,0                /* if (fpa.exp == 0) */
        SAVEREG(23)                     /* save r23 */
        crand   cr6_zero,cr6_zero,cr0_2 /* fpa.zero=(fpa.exp==0 && fpa==0) */
        SAVEREG(24)                     /* save r24 */
        crandc  cr0_2,cr0_2,cr6_zero    /* if (fpa.exp==0 && fpa!=0) */
        cmpwi   cr3,r11,0x7ff           /* if (fpb.exp == DEXPMAX) */
        beq+    denormal_a              /* {  Add implied 1 to significand */ 
        oris    r9,r9,0x0010            /*    fpa.hi |= 0x00100000; */
        b       adone                   /* } else { */
denormal_a:
        addi    r8,r8,1                 /*    fpa.exp++; */
adone:                                  /* } */
        SAVEREG(25)                     /* save r25 */
        cmpwi   cr0,r11,0               /* if (fpb.exp == 0) */
        SAVEREG(26)                     /* save r26 */
        crand   cr7_zero,cr7_zero,cr0_2 /* fpb.zero=(fpb.exp==0 && fpb==0) */
        SAVEREG(27)                     /* save r27 */
        crandc  cr0_2,cr0_2,cr7_zero    /* if (fpb.exp==0 && fpb!=0) */
        SAVEREG(28)                     /* save r28 */
        beq+    denormal_b        /* {  Add implied 1 to significand */ 
        oris    r12,r12,0x0010          /*    fpb.hi |= 0x00100000; */
        b       bdone                   /* } else { */
denormal_b:
        addi    r11,r11,1               /*   fpb.exp++; */
bdone:                                  /* } */

        bt      cr2_2,a_INForNaN        /* if (fpa.exp == 0x7ff) goto a_INForNaN; */
        bt      cr3_2,b_INForNaN        /* if (fpb.exp == 0x7ff) goto b_INForNaN; */
        bt      cr6_zero,a_zero         /* if (fpa.zero) goto a_zero; */
        bt      cr7_zero,b_zero         /* if (fpb.zero) goto b_zero; */
                                        
                                        
        SAVEREG(29)                     /* save r29 */
/* force divisor to be normalized                                               */
        cntlzw  r7,r12                  /* s = cntlz(fpb.hi); */
        cmpwi   cr0,r7,11               /* if (s > 11) */
        SAVEREG(30)                     /* save r30 */
        SAVEREG(31)                     /* save r31 */
        ble     drdone                  /* { */
        cmpwi   cr0,r7,32               /*   if (s < 32) */
        addi    r7,r7,-11               /*     r6 = s-11; */
        subfic  r5,r7,32                /*     r5 = 43-s; */
        bge     bshgt32                 /*   { */
        subf    r11,r7,r11              /*     fpb.exp -= (s-11); */
        slw     r12,r12,r7              /*     fpb.hi <<= s-11; */
        srw     r0,r6,r5                /*     temp = fpa.lo >> 43-s; */
        or      r12,r12,r0              /*     fpb.hi |= temp; */
        slw     r6,r6,r7                /*     fpb.lo <<= s-11; */
        b       drdone                  /*   } */
bshgt32:                                /*   else */
                                        /*   { */
        addi    r11,r11,-32             /*     fpb.exp -= 32; */
        cntlzw  r7,r6                   /*     s = cntlz(fpb.lo); */
        cmpwi   cr0,r7,11               /*     if (s >= 11) */
        blt     bshgt43                 /*     { */
        addi    r7,r7,-11               /*       r6 = s-11; */
        subf    r11,r7,r11              /*       fpb.exp -= (s-11); */
        slw     r12,r6,r7               /*       fpb.hi = fpb.lo << s-11; */
        addi    r6,0,0                  /*       fpb.lo = 0; */
        b       drdone                  /*     } */
bshgt43:                                /*     else */
                                        /*     { */
        subfic  r7,r7,11                /*       r6 = 11-s; */
        add     r11,r7,r11              /*       fpb.exp += (11-s); */
        srw     r12,r6,r7               /*       fpb.hi = fpb.lo >> 11-s; */
        subfic  r5,r7,32                /*       r5 = 21+s; */
        slw     r6,r6,r5                /*       fpb.lo << (21+s); */
                                        /*     } */
                                        /*   } */
drdone:                                 /* } */
/* right justify a 16 bit divisor                                               */
        rlwinm  r7,r12,32-5,0x0000ffff  /* dr = fpb.hi >> 5; */
/* Calculate sign                                                               */
        crxor   cr6_sign,cr6_sign,cr7_sign
/* set up for loop                                                              */
        crset   first_loop              /* first_loop = true; */
        addi    r0,0,4                  /* r0 = 4; */
        mtctr   r0                      /* ctr = 4; */
        addi    r31,0,0                 /* t.save = 0; */
do_divide:                              /* for (nest=0;nest<5;nest++) { */
/* force dividend to be normalized                                              */
        cntlzw  r28,r9                  /* s = cntlz(fpa.hi); */
        cmpwi   cr0,r28,11              /* if (s > 11) */
        ble     dddone                  /* { */
        cmpwi   cr0,r28,32              /*   if (s < 32) */
        addi    r28,r28,-11             /*     r28 = s-11; */
        subfic  r5,r28,32               /*     r5 = 43-s; */
        bge     ashgt32                 /*   { */
        subf    r8,r28,r8               /*     fpa.exp -= (s-11); */
        slw     r9,r9,r28               /*     fpa.hi <<= s-11; */
        srw     r0,r10,r5               /*     temp = fpa.lo >> 43-s; */
        or      r9,r9,r0                /*     fpa.hi |= temp; */
        slw     r10,r10,r28             /*     fpa.lo <<= s-11; */
        srw     r0,r31,r5               /*     temp = r.save >> 43-s; */
        or      r10,r10,r0              /*     fpa.lo |= temp; */
        b       dddone                  /*   } */
ashgt32:                                /*   else */
                                        /*   { */
        addi    r8,r8,-32               /*     fpa.exp -= 32; */
        cntlzw  r28,r10                 /*     s = cntlz(fpa.lo); */
        cmpwi   cr0,r28,11              /*     if (s >= 11) */
        blt     ashgt43                 /*     { */
        addi    r28,r28,-11             /*       r28 = s-11; */
        subf    r8,r28,r8               /*       fpa.exp -= (s-11); */
        slw     r9,r10,r28              /*       fpa.hi = fpa.lo << s-11; */
        subfic  r5,r28,32               /*       r5 = 43-s; */
        srw     r0,r31,r5               /*       temp = t.save >> 43-s; */
        or      r9,r9,r0                /*       fpa.hi |= temp; */
        slw     r10,r31,r28             /*       fpa.lo = t.save << s-11; */
        b       dddone                  /*     } */
ashgt43:                                /*     else */
                                        /*     { */
        subfic  r28,r28,11              /*       r28 = 11-s; */
        add     r8,r28,r8               /*       fpa.exp += (11-s); */
        srw     r9,r10,r28              /*       fpa.hi = fpa.lo >> 11-s; */
        subfic  r5,r28,32               /*       r5 = 21+s; */
        slw     r10,r10,r5              /*       fpa.lo << (21+s); */
        srw     r0,r31,r28              /*       temp = tsave >> 11-s; */
        or      r10,r10,r0              /*       fpa.lo |= temp; */
                                        /*     } */
                                        /*   } */
dddone:                                 /* } */
/* left justify dividend                                                        */
        rlwinm  r5,r9,11,0xfffff800     /* dd = fpa.hi << 11; */
        rlwimi  r5,r10,11,0x000007ff    /* dd |= fpa.lo >> 21; */
/* estimate quotient (truncate to 16 bits)                                      */
        divwu   r27,r5,r7               /* e.hi = dd/dr; */
        cntlzw  r28,r27                 /* s = cntlz(e.hi); */
        addi    r26,r8,1023+15          /* e.exp = fpa.exp + DEXPBIAS+15; */
        subf    r26,r11,r26             /* e.exp -= fpb.exp; */
        subf    r26,r28,r26             /* e.exp -= s; */
        addi    r28,r28,-11             /* r28 = s-11; */
        rlwnm   r27,r27,r28,0x001fffe0  /* e.hi = (e.hi << s-11) & 0x001fffe0; */
eshdone:
/* t (temporary) = b (divisor) * e (estimate)                                   */
/* Multiply b-low and e                                                         */
        mulhwu  r24,r6,r27              /* r24,r25 = fpb.lo * e; */
        mullw   r25,r6,r27
/* r29,r30,r31 = r24,r25 >> 20                                                  */
        rlwinm  r29,r24,32-20,0xfff     /* r29 = r24 >> 20; */
        rlwinm  r30,r24,12,0xfffff000   /* r30 = r24 << 12; */
        rlwimi  r30,r25,32-20,0xfff     /* r30 += (r25>>20)&0xfff; */
        rlwinm  r31,r25,12,0xfffff000   /* r31 = r25 << 12; */
/* Multiply b-high and e                                                        */
        mulhwu  r24,r12,r27             /* r24,r25 = fpb.hi * e; */
        mullw   r25,r12,r27
/* r21,r22 = r24,r25 << 12                                                      */
        rlwinm  r21,r24,12,0xfffff000   /* t.hi = r24 << 12; */
        rlwimi  r21,r25,32-20,0xfff     /* t.hi += ((r25>>20)&0xfff); */
        rlwinm  r22,r25,12,0xfffff000   /* t.lo = r25 << 12; */
/* r21,r22,r31 += r29,r30,r31                                                   */
        addc    r22,r22,r30             /* t.lo += r30; */
        adde    r21,r21,r29             /* t.hi += r29 + (CA); */
        add     r20,r11,r26             /* t.exp = fpb.exp + e.exp; */
        addi    r20,r20,-1023           /* t.exp -= DEXPBIAS; */

/* r (remainder) = a (dividend) - t (temporary)                                 */
        cmpw    cr0,r20,r8              /* if (t.exp < fpa.exp) */
        subf    r28,r20,r8              /*   shift = fpa.exp - t.exp; */
        subfic  r5,r28,32               /*   r5 = 32-shift; */
        bge+    tnoshift                /* { */
        srw     r31,r31,r28             /*   t.save >> shift; */
        slw     r0,r22,r5               /*   temp = t.lo << (32-shift); */
        or      r31,r31,r0              /*   t.save |= temp; */
        srw     r22,r22,r28             /*   t.lo >>= shift; */
        slw     r0,r21,r5               /*   temp = t.hi << (shift-32); */
        or      r22,r22,r0              /*   t.lo |= temp; */
        srw     r21,r21,r28             /*   t.hi >>= shift; */
tnoshift:                               /* } */
        subfic  r31,r31,0               /* t = -t; */
        subfze  r22,r22
        subfze  r21,r21
do_rem:
        adde    r25,r22,r10             /* r.lo = fpa.lo + t.lo; */
        adde.   r24,r21,r9              /* r.hi = fpa.hi + t.hi; */
        subf    r20,r26,r17             /*   shift = c.exp - e.exp; */
        bge+    addinq                  /* if (r.hi >= 0) { */
        addi    r27,r27,-32             /*   e.hi -= 32; */
        b       eshdone
addinq:                                 /* } */
/* c (quotient) += e (estimate)                                                 */
        bt      first_loop,first        /* if (first_loop) { */
        cmpwi   cr0,r20,32              /*   if (shift < 32) */
        subfic  r21,r20,32              /*     r21 = 32-shift; */
        addi    r0,0,0                  /*     round = 0; */
        bge     shgt32                  /*   { */
        slw     r28,r27,r21             /*     e.lo = e.hi << (32-shift); */
        srw     r27,r27,r20             /*     e.hi >>= shift; */
        b       addit                   /*   } */
shgt32:                                 /*   else */
                                        /*   { */
        cmpwi   cr0,r20,64              /*     if (shift => 64) */
        addi    r20,r20,-32             /*     shift -= 32; */
        subfic  r21,r20,32              /*     r21 = 32-shift; */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -