📄 alt_exception_muldiv.s

📁 nios中自定义指令集实现三角函数的软件部分。
💻 S
📖 第 1 页 / 共 2 页
字号:
上一页 12
        bge   r3, zero, 0f
        sub   r3, zero, r3     /* -r3 */
0:
        bge   r6, zero, 0f
        sub   r6, zero, r6     /* -r6 */
0:


.Lunsigned_division:
        /* Initialize the unsigned-division loop. */
        movi  r13, 0          /* remainder = 0 */

        /* Now
        * r3 = dividend : quotient
        * r4 = 0x25 for div, 0x24 for divu
        * r6 = divisor
        * r13 = remainder
        * r14 = loop counter (already initialized to 32)
        * r17 = MSB contains sign of quotient
        */


        /*
        *   for (count = 32; count > 0; --count)
        *   {
        */
.Ldivide_loop:

        /*
        *       Division:
        *
        *       (remainder:dividend:quotient) <<= 1;
        */
        slli  r13, r13, 1
        cmplt r15, r3, zero        /* r15 = MSB of r3 */
        or    r13, r13, r15
        slli  r3, r3, 1


        /*
        *       if (remainder >= divisor)
        *       {
        *           set LSB of quotient
        *           remainder -= divisor;
        *       }
        */
        bltu  r13, r6, .Ldiv_skip
        ori   r3, r3, 1
        sub   r13, r13, r6
.Ldiv_skip:

        /*
        *   }
        */
        subi  r14, r14, 1
        bne   r14, zero, .Ldivide_loop

        mov   r9, r3


        /* Now
        * r9 = quotient
        * r4 = 0x25 for div, 0x24 for divu
        * r7 = 4*(C^16)
        * r17 = MSB contains sign of quotient
        */

    
        /*
        *  Conditionally negate signed quotient.  If quotient is unsigned,
        *  the sign already is initialized to 0.
        */
        bge   r17, zero, .Lstore_result
        sub   r9, zero, r9     /* -r9 */

        br    .Lstore_result




        /* MULTIPLICATION
        *
        * A "product" is the number that one gets by summing a "multiplicand"
        * several times.  The "multiplier" specifies the number of copies of the
        * multiplicand that are summed.
        *
        * Actual multiplication algorithms don't use repeated addition, however.
        * Shift-and-add algorithms get the same answer as repeated addition, and
        * they are faster.  To compute the lower half of a product (pppp below)
        * one shifts the product left before adding in each of the partial products
        * (a * mmmm) through (d * mmmm).
        *
        * To compute the upper half of a product (PPPP below), one adds in the
        * partial products (d * mmmm) through (a * mmmm), each time following the
        * add by a right shift of the product.
        *
        *     mmmm
        *   * abcd
        *   ------
        *     ####  = d * mmmm
        *    ####   = c * mmmm
        *   ####    = b * mmmm
        *  ####     = a * mmmm
        * --------
        * PPPPpppp
        *
        * The example above shows 4 partial products.  Computing actual Nios II
        * products requires 32 partials.
        *
        * It is possible to compute the result of mulxsu from the result of mulxuu
        * because the only difference between the results of these two opcodes is
        * the value of the partial product associated with the sign bit of rA.
        *
        *   mulxsu = mulxuu - ((rA < 0) ? rB : 0);
        *
        * It is possible to compute the result of mulxss from the result of mulxsu
        * because the only difference between the results of these two opcodes is
        * the value of the partial product associated with the sign bit of rB.
        *
        *   mulxss = mulxsu - ((rB < 0) ? rA : 0);
        *
        */

.Lmul_immed:
        /* Opcode is muli.  Change it into mul for remainder of algorithm. */
        mov   r7, r6         /* Field B is dest register, not field C. */
        mov   r6, r4         /* Field IMM16 is src2, not field B. */
        movi  r4, 0x27       /* OPX of mul is 0x27 */

.Lmultiply:
        /* Initialize the multiplication loop. */
        movi  r9, 0          /* mul_product    = 0 */
        movi  r10, 0         /* mulxuu_product = 0 */
        mov   r11, r6        /* save original multiplier for mulxsu and mulxss */
        mov   r12, r6        /* mulxuu_multiplier (will be shifted) */
        movi  r16, 1         /* used to create "rori B,A,1" from "ror B,A,r16" */

        /* Now
        * r3 = multiplicand
        * r6 = mul_multiplier
        * r7 = 4 * dest_register (used later as offset to sp)
        * r9 = mul_product
        * r10 = mulxuu_product
        * r11 = original multiplier
        * r12 = mulxuu_multiplier
        * r14 = loop counter (already initialized)
        * r15 = temp
        * r16 = 1
        */


        /*
        *   for (count = 32; count > 0; --count)
        *   {
        */
.Lmultiply_loop:

        /*
        *       mul_product <<= 1;
        *       lsb = multiplier & 1;
        */
        slli   r9, r9, 1
        andi   r15, r12, 1

        /*
        *       if (lsb == 1)
        *       {
        *           mulxuu_product += multiplicand;
        *       }
        */
        beq   r15, zero, .Lmulx_skip
        add   r10, r10, r3
        cmpltu r15, r10, r3  /* Save the carry from the MSB of mulxuu_product. */
        ror   r15, r15, r16  /* r15 = 0x80000000 on carry, or else 0x00000000 */
.Lmulx_skip:

        /*
        *       if (MSB of mul_multiplier == 1)
        *       {
        *           mul_product += multiplicand;
        *       }
        */
        bge   r6, zero, .Lmul_skip
        add   r9, r9, r3
.Lmul_skip:

        /*
        *       mulxuu_product >>= 1;           logical shift
        *       mul_multiplier <<= 1;           done with MSB
        *       mulx_multiplier >>= 1;          done with LSB
        */
        srli   r10, r10, 1
        or     r10, r10, r15           /* OR in the saved carry bit. */
        slli   r6, r6, 1
        srli   r12, r12, 1


        /*
        *   }
        */
        subi   r14, r14, 1
        bne    r14, zero, .Lmultiply_loop


        /*
        *  Multiply emulation loop done.
        */

        /* Now
        * r3 = multiplicand
        * r4 = OPX
        * r7 = 4 * dest_register (used later as offset to sp)
        * r9 = mul_product
        * r10 = mulxuu_product
        * r11 = original multiplier
        * r15 = temp
        */


        /*
        *  Select/compute the result based on OPX.
        */


        /* OPX == mul?  Then store. */
        xori  r15, r4, 0x27
        beq   r15, zero, .Lstore_result

        /* It's one of the mulx.. opcodes.  Move over the result. */
        mov   r9, r10

        /* OPX == mulxuu?  Then store. */
        xori  r15, r4, 0x07
        beq   r15, zero, .Lstore_result

        /* Compute mulxsu
         *
         * mulxsu = mulxuu - ((rA < 0) ? rB : 0);
         */
        bge   r3, zero, .Lmulxsu_skip
        sub   r9, r9, r11
.Lmulxsu_skip:

        /* OPX == mulxsu?  Then store. */
        xori  r15, r4, 0x17
        beq   r15, zero, .Lstore_result

        /* Compute mulxss
         *
         * mulxss = mulxsu - ((rB < 0) ? rA : 0);
         */
        bge   r11, zero, .Lmulxss_skip
        sub   r9, r9, r3
.Lmulxss_skip:
        /* At this point, assume that OPX is mulxss, so store */


.Lstore_result:
        add   r7, r7, sp
        stw   r9, 0(r7)

        ldw   r16,  0(sp)
        ldw   r17,  4(sp)
        ldw   r18,  8(sp)
        ldw   r19, 12(sp)
        ldw   r20, 16(sp)
        ldw   r21, 20(sp)
        ldw   r22, 24(sp)
        ldw   r23, 28(sp)

                            /* bt @ 32 - Breakpoint register usually isn't an operand. */
                            /* et @ 36 - Don't corrupt et. */
                            /* gp @ 40 - Don't corrupt gp. */
                            /* sp @ 44 - Don't corrupt sp. */
        ldw   fp,  48(sp)
                            /* ea @ 52 - Don't corrupt ea. */
                            /* ba @ 56 - Breakpoint register usually isn't an operand. */

        addi  sp, sp, 60

        br    .Lexception_exit


.Lnot_muldiv:

        addi  sp, sp, 60


        .section .exceptions.exit.label
.Lexception_exit:
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -