⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ppc_fmul.s

📁 powerpc 405 优化过的硬浮点库
💻 S
📖 第 1 页 / 共 3 页
字号:
     rlwinm.   r11,r9,0,1,31       
     cmpwi     cr2,r10,0           
     bne       full                
     bne       cr2,full            
                                   
/*   a is 0:                                                                       */
/*   Calculate sign:                                                               */
return0:                           
     xor       r3,r9,r12             /* get product sign */
     rlwinm    r3,r3,0,0,0       
     li        r4,0                
     mtcr      r0                     /* restore cr */
     mfctr     r13                    /* restore r13 */
     blr                              /* return */

tst0b:
     rlwinm.   r11,r12,0,1,31
     cmpwi     cr2,r6,0
     bne       full
     bne       cr2,full
/*   b is 0:                                                                    */
     b        return0

/*---------------- end of normal-only case ---------------                      */
full:
     mtcr     r0                      /* restore CR */
     
     mflr     r0                      /* save link register in caller's stack */
     stw      r0,4(r1)               
     stwu     r1,-STACKSIZE(r1)      /* set up stack frame to hold saved regs */

     mfctr    r13                     /* restore r13 */

     SAVEREG(28)                      /* save r28 */
     mfcr     r28                     /* save cr */
     mtctr    r13                     /* save r13 in CTR */
     
/* load fpa into r8,r9,r10 and cr6.  load fpb into r11, r12, r13 and cr7 */
     mr       r9,r3                   /* load fpa.exp, fpa.S, fpa.hi */
     mr       r12,r5                  /* load fpb.exp, fpb.S, fpb.hi */
     rlwinm   r8,r9,32-20,0x7ff       /* isolate exponent of fpa */
     rlwinm   r11,r12,32-20,0x7ff     /* isolate exponent of fpb */
     mr       r10,r4                  /* load fpa.lo */
     cmpwi    cr6,r9,0                /* set fpa.sign */
     mr       r13,r6                  /* load fpb.lo */
     cmpwi    cr7,r12,0               /* set fpb.sign */
     rlwinm.  r9,r9,0,0x000fffff      /* isolate fpa.hi */
     cror     cr6_zero,cr0_2,cr0_2    /* fpa.zero = fpa.hi == 0 */
     rlwinm.  r12,r12,0,0x000fffff    /* isolate fpb.hi */
     SAVEREG(29)                      /* save r29 */
     cror     cr7_zero,cr0_2,cr0_2    /* fpb.zero = fpa.hi == 0 */
     cmpwi    cr0,r10,0               /* if (fpa.lo == 0) */
     SAVEREG(30)                      /* save r30 */
     crand    cr6_zero,cr6_zero,cr0_2 /* fpa.zero = fpa.hi==0 && fpa.lo==0 */
     cmpwi    cr0,r13,0               /* if (fpa.lo ==  0) */
     SAVEREG(31)                      /* save r31 */
     crand    cr7_zero,cr7_zero,cr0_2 /* fpb.zero = fpb.hi==0 && fpb.lo==0 */
     cmpwi    cr0,r8,0x7ff            /* if (fpa.exp == DEXPMAX) */
     crand    cr6_inf,cr6_zero,cr0_2  /* fpa.inf=(fpa.exp==DEXPMAX && fpa==0) */
     crandc   cr6_NaN,cr0_2,cr6_zero  /* fpa.NaN=(fpa.exp==DEXPMAX && fpa!=0) */
     cmpwi    cr0,r8,0                /* if (fpa.exp == 0) */
     crand    cr6_zero,cr6_zero,cr0_2 /* fpa.zero=(fpa.exp==0 && fpa==0) */
     crandc   cr0_2,cr0_2,cr6_zero    /* if (fpa.exp==0 && fpa!=0) */
     cmpwi    cr1,r11,0x7ff           /* if (fpb.exp == DEXPMAX) */
     beq      denormal_a              /* {   Add implied 1 to significand */ 
     oris     r9,r9,0x0010            /*    fpa.hi |= 0x00100000; */
     b        adone                   /* } else { */
denormal_a:
     addi     r8,r8,1                 /*    fpa.exp++; */
adone:                                /* } */
     crand    cr7_inf,cr7_zero,cr1_2  /* fpb.inf=(fpb.exp==DEXPMAX && fpb==0) */
     crandc   cr7_NaN,cr1_2,cr7_zero  /* fpb.NaN=(fpb.exp==DEXPMAX && fpb!=0) */
     cmpwi    cr0,r11,0               /* if (fpb.exp == 0) */
     crand    cr7_zero,cr7_zero,cr0_2 /* fpb.zero=(fpb.exp==0 && fpb==0) */
     crandc   cr0_2,cr0_2,cr7_zero    /* if (fpb.exp==0 && fpb!=0) */
     beq      denormal_b              /* {   Add implied 1 to significand */ 
     oris     r12,r12,0x0010          /*    fpb.hi |= 0x00100000; */
     b        bdone                   /* } else { */
denormal_b:
     addi     r11,r11,1               /*   fpb.exp++; */
bdone:                                /* } */

/* if (fpa == NaN) return QNaN;                                                 */
     bt       cr6_NaN,a_NaN           /* if (fpa == NaN) goto a_NaN; */
     
/* if (fpb == NaN) return QNaN;                                                 */
     bt       cr7_NaN,b_NaN           /* if (fpb == NaN) goto b_NaN; */
     
/* if (fpa == 0)     return fpb;                                                */
     bt       cr6_zero,a_zero         /* if (fpa == 0) goto a_zero; */
     
/* if (fpb == 0)     return fpa;                                                */
     bt       cr7_zero,b_zero         /* if (fpb == 0) goto b_zero; */
     
/* check     for infinities                                                     */
     bt       cr6_inf,a_INF           /* if (fpa.INF) goto a_INF; */
     bt       cr7_inf,b_INF           /* if (fpb.INF) goto b_INF; */
     
/* Multiply a-low and b-low (hi-result)                                         */
     mulhwu   r4,r10,r13              /* r4,r11 = fpa.lo * fpb.lo; */
     
/* Calculate exponent                                                           */
     add      r8,r8,r11               /* fpa.exp += fpb.exp; */
     addi     r8,r8,-1023             /* fpa.exp -= DEXPBIAS; */
     
/* Calculate sign                                                               */
     crxor    cr6_sign,cr6_sign,cr7_sign
     
     mullw    r11,r10,r13             /* r4,r11 = fpa.lo * fpb.lo; */
     
/* r5,r6,r0 = r4,r11 >> 52                                                      */
     rlwinm   r5,r4,32-20,0x00000fff  /* n[1] = r4 >> 20; */
     rlwinm   r6,r4,12,0xfffff000     /* n[2] = r4 << 12; */
     rlwimi   r6,r11,32-20,0x00000fff /* n[2] += ((r11>>20)&0xfff); */
     rlwinm   r0,r11,12,0xfffff000    /* n[3] = r11 << 12; */
     
/* Multiply a-high and b-low                                                    */
     mulhwu   r4,r9,r13
     mullw    r11,r9,r13
     
/* r29,r30,r31 = r4,r11 >> 20                                               */
     rlwinm   r29,r4,32-20,0x00000fff /* n[0] = r4 >> 20 */
     rlwinm   r30,r4,12,0xfffff000    /* n[1] = r4 << 12 */
     rlwimi   r30,r11,32-20,0xfff     /* n[1] += (r11>>20)&0xfff */
     rlwinm   r31,r11,12,0xfffff000   /* n[2] = r11 << 12 */
     
/* r4,r5,r6 += r29,r30,r31                                                      */
     addc     r6,r6,r31
     adde     r5,r5,r30
     addze    r4,r29
     
/* Multiply a-low and b-high                                                    */
     mulhwu   r13,r10,r12
     mullw    r11,r10,r12
     
/* r29,r30,r31 = r13,r11 >> 20                                          */
     rlwinm   r29,r13,32-20,0xfff     /* n[0] = r13 >> 20 */
     rlwinm   r30,r13,12,0xfffff000   /* n[1] = r13 << 12 */
     rlwimi   r30,r11,32-20,0xfff     /* n[1] += (r11>>20)&0xfff */
     rlwinm   r31,r11,12,0xfffff000   /* n[2] = r11 << 12 */
     
/* r4,r5,r6 += r29,r30,r31                                                      */
     addc     r6,r6,r31
     adde     r5,r5,r30
     adde     r4,r4,r29
     
/* Multiply a-high and b-high                                                   */
     mulhwu   r13,r9,r12
     mullw    r11,r9,r12
     
/* r29,r30,r31 = r13,r11 << 12                                          */
     rlwinm   r29,r13,12,0xfffff000   /* n[0] = r13 <<     12 */
     rlwimi   r29,r11,32-20,0xfff     /* n[0] += ((r11>>20)&0xfff) */
     rlwinm   r30,r11,12,0xfffff000   /* n[1] = r9 << 12 */
     
/* r4,r5,r6 += r29,r30,r31                                                      */
     addc     r5,r5,r30
     adde     r4,r4,r29
     
/* put results from r4,r5,r6,r0 into r9 and r10                                 */
     cntlzw   r13,r4                  /* s = cntlz(n[0]); */
     cmpwi    cr0,r13,11              /* if (s == 11) */
     RESTREG(29)                      /* restore r29 */
     RESTREG(30)                      /* restore r30 */
     bne      notjust                 /* { */
     cmpwi    cr0,r0,0                /*   sticky bit? */
     mr       r9,r4                   /*   fpa.hi = n[0]; */
     mr       r10,r5                  /*   fpa.lo = n[1]; */
     mr       r0,r6                   /*   round = n[2]; */
     beq      nosticky                /*   if (sticky) */
     ori      r0,r0,0x0001            /*     round |= 0x00000001; */
nosticky:
     b noshift                        /* } */
notjust:
     bgt      noshrght                /* else if (s < 11) */
                                      /* { */
     subfic   r13,r13,11              /*   r13     = 11-s; */
     srw      r10,r5,r13              /*   fpa.lo = n[1] >> (11-s); */
     subfic   r11,r13,32              /*   r11     = 21+s; */
     slw      r7,r6,r11               /*   sticky = shifted out bits */
     or.      r0,r0,r7                /*            + lo-word */
     srw      r0,r6,r13               /*   round = n[2] >> (11-s); */
     slw      r6,r5,r11               /*   temp = n[1]     << (21+s); */
     or       r0,r0,r6                /*   round |= temp; */
     beq      nosticky2
     ori      r0,r0,0x0001            /*   add in sticky bit */
nosticky2:
     slw      r6,r4,r11               /*   temp = n[0]     << (21+s); */
     or       r10,r10,r6              /*   fpa.lo |= temp; */
     srw      r9,r4,r13               /*   fpa.hi = n[0] >> (11-s); */
     add      r8,r8,r13               /*   fpa.exp += (11-s); */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -