📄 alt_exception_muldiv.s
字号:
bge r3, zero, 0f
sub r3, zero, r3 /* -r3 */
0:
bge r6, zero, 0f
sub r6, zero, r6 /* -r6 */
0:
.Lunsigned_division:
/* Initialize the unsigned-division loop. */
movi r13, 0 /* remainder = 0 */
/* Now
* r3 = dividend : quotient
* r4 = 0x25 for div, 0x24 for divu
* r6 = divisor
* r13 = remainder
* r14 = loop counter (already initialized to 32)
* r17 = MSB contains sign of quotient
*/
/*
* for (count = 32; count > 0; --count)
* {
*/
.Ldivide_loop:
/*
* Division:
*
* (remainder:dividend:quotient) <<= 1;
*/
slli r13, r13, 1
cmplt r15, r3, zero /* r15 = MSB of r3 */
or r13, r13, r15
slli r3, r3, 1
/*
* if (remainder >= divisor)
* {
* set LSB of quotient
* remainder -= divisor;
* }
*/
bltu r13, r6, .Ldiv_skip
ori r3, r3, 1
sub r13, r13, r6
.Ldiv_skip:
/*
* }
*/
subi r14, r14, 1
bne r14, zero, .Ldivide_loop
mov r9, r3
/* Now
* r9 = quotient
* r4 = 0x25 for div, 0x24 for divu
* r7 = 4*(C^16)
* r17 = MSB contains sign of quotient
*/
/*
* Conditionally negate signed quotient. If quotient is unsigned,
* the sign already is initialized to 0.
*/
bge r17, zero, .Lstore_result
sub r9, zero, r9 /* -r9 */
br .Lstore_result
/* MULTIPLICATION
*
* A "product" is the number that one gets by summing a "multiplicand"
* several times. The "multiplier" specifies the number of copies of the
* multiplicand that are summed.
*
* Actual multiplication algorithms don't use repeated addition, however.
* Shift-and-add algorithms get the same answer as repeated addition, and
* they are faster. To compute the lower half of a product (pppp below)
* one shifts the product left before adding in each of the partial products
* (a * mmmm) through (d * mmmm).
*
* To compute the upper half of a product (PPPP below), one adds in the
* partial products (d * mmmm) through (a * mmmm), each time following the
* add by a right shift of the product.
*
* mmmm
* * abcd
* ------
* #### = d * mmmm
* #### = c * mmmm
* #### = b * mmmm
* #### = a * mmmm
* --------
* PPPPpppp
*
* The example above shows 4 partial products. Computing actual Nios II
* products requires 32 partials.
*
* It is possible to compute the result of mulxsu from the result of mulxuu
* because the only difference between the results of these two opcodes is
* the value of the partial product associated with the sign bit of rA.
*
* mulxsu = mulxuu - ((rA < 0) ? rB : 0);
*
* It is possible to compute the result of mulxss from the result of mulxsu
* because the only difference between the results of these two opcodes is
* the value of the partial product associated with the sign bit of rB.
*
* mulxss = mulxsu - ((rB < 0) ? rA : 0);
*
*/
.Lmul_immed:
/* Opcode is muli. Change it into mul for remainder of algorithm. */
mov r7, r6 /* Field B is dest register, not field C. */
mov r6, r4 /* Field IMM16 is src2, not field B. */
movi r4, 0x27 /* OPX of mul is 0x27 */
.Lmultiply:
/* Initialize the multiplication loop. */
movi r9, 0 /* mul_product = 0 */
movi r10, 0 /* mulxuu_product = 0 */
mov r11, r6 /* save original multiplier for mulxsu and mulxss */
mov r12, r6 /* mulxuu_multiplier (will be shifted) */
movi r16, 1 /* used to create "rori B,A,1" from "ror B,A,r16" */
/* Now
* r3 = multiplicand
* r6 = mul_multiplier
* r7 = 4 * dest_register (used later as offset to sp)
* r9 = mul_product
* r10 = mulxuu_product
* r11 = original multiplier
* r12 = mulxuu_multiplier
* r14 = loop counter (already initialized)
* r15 = temp
* r16 = 1
*/
/*
* for (count = 32; count > 0; --count)
* {
*/
.Lmultiply_loop:
/*
* mul_product <<= 1;
* lsb = multiplier & 1;
*/
slli r9, r9, 1
andi r15, r12, 1
/*
* if (lsb == 1)
* {
* mulxuu_product += multiplicand;
* }
*/
beq r15, zero, .Lmulx_skip
add r10, r10, r3
cmpltu r15, r10, r3 /* Save the carry from the MSB of mulxuu_product. */
ror r15, r15, r16 /* r15 = 0x80000000 on carry, or else 0x00000000 */
.Lmulx_skip:
/*
* if (MSB of mul_multiplier == 1)
* {
* mul_product += multiplicand;
* }
*/
bge r6, zero, .Lmul_skip
add r9, r9, r3
.Lmul_skip:
/*
* mulxuu_product >>= 1; logical shift
* mul_multiplier <<= 1; done with MSB
* mulx_multiplier >>= 1; done with LSB
*/
srli r10, r10, 1
or r10, r10, r15 /* OR in the saved carry bit. */
slli r6, r6, 1
srli r12, r12, 1
/*
* }
*/
subi r14, r14, 1
bne r14, zero, .Lmultiply_loop
/*
* Multiply emulation loop done.
*/
/* Now
* r3 = multiplicand
* r4 = OPX
* r7 = 4 * dest_register (used later as offset to sp)
* r9 = mul_product
* r10 = mulxuu_product
* r11 = original multiplier
* r15 = temp
*/
/*
* Select/compute the result based on OPX.
*/
/* OPX == mul? Then store. */
xori r15, r4, 0x27
beq r15, zero, .Lstore_result
/* It's one of the mulx.. opcodes. Move over the result. */
mov r9, r10
/* OPX == mulxuu? Then store. */
xori r15, r4, 0x07
beq r15, zero, .Lstore_result
/* Compute mulxsu
*
* mulxsu = mulxuu - ((rA < 0) ? rB : 0);
*/
bge r3, zero, .Lmulxsu_skip
sub r9, r9, r11
.Lmulxsu_skip:
/* OPX == mulxsu? Then store. */
xori r15, r4, 0x17
beq r15, zero, .Lstore_result
/* Compute mulxss
*
* mulxss = mulxsu - ((rB < 0) ? rA : 0);
*/
bge r11, zero, .Lmulxss_skip
sub r9, r9, r3
.Lmulxss_skip:
/* At this point, assume that OPX is mulxss, so store */
.Lstore_result:
add r7, r7, sp
stw r9, 0(r7)
ldw r16, 0(sp)
ldw r17, 4(sp)
ldw r18, 8(sp)
ldw r19, 12(sp)
ldw r20, 16(sp)
ldw r21, 20(sp)
ldw r22, 24(sp)
ldw r23, 28(sp)
/* bt @ 32 - Breakpoint register usually isn't an operand. */
/* et @ 36 - Don't corrupt et. */
/* gp @ 40 - Don't corrupt gp. */
/* sp @ 44 - Don't corrupt sp. */
ldw fp, 48(sp)
/* ea @ 52 - Don't corrupt ea. */
/* ba @ 56 - Breakpoint register usually isn't an operand. */
addi sp, sp, 60
br .Lexception_exit
.Lnot_muldiv:
addi sp, sp, 60
.section .exceptions.exit.label
.Lexception_exit:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -