📄 bnippc.s
字号:
/*
* bnippc.s - Assembly primitives for the bignum library, PowerPC version.
*
* $Id: bnippc.s,v 1.1 2001/06/29 01:36:30 dallen Exp $
*
* Register usage during function calls is:
* r0 - volatile
* r1 - stack pointer, preserved
* r2 - TOC pointer, preserved
* r3 - First argument and return value register
* r4-r10 - More argument registers, volatile
* r11-r12 - Volatile
* r13-r31 - Preserved
* LR, CTR, XER and MQ are all volatile.
* LR holds return address on entry.
*
* On the PPC 601, unrolling the loops more doesn't seem to speed things
* up at all. I'd be curious if other chips differed.
*/
/*
* MulN1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*/
/*
* asm void
* bniMulN1_32(register unsigned *out, register unsigned const *in,
* register unsigned len, register unsigned k);
*/
#define SP r1
#define RTOC r2
#define out r3
#define in r4
#define len r5
#define k r6
.globl _bniMulN1_32
_bniMulN1_32:
lwz r7,0(in) /* Load first word of in in r7 */
mtctr len /* Move len into CTR */
mullw r8,r7,k /* Low half of multiply in r8 */
addic r0,r0,0 /* Clear carry bit for loop */
mulhwu len,r7,k /* High half of multiply in len */
stw r8,0(out) /* *out = r8 */
mulhwu len,r7,k /* len is high word of product, for carry */
bdz- bniMulN1_32_label /* Branch to Label if --ctr == 0 */
bniMulN1_32_loop:
lwzu r7,4(in) /* r7 = *++in */
mullw r8,r7,k /* Low half of multiply in r8 */
adde r8,r8,len /* Add carry word len and bit CF to r8 */
stwu r8,4(out) /* *++out = r8 */
mulhwu len,r7,k /* len is high word of product, for carry */
bdnz+ bniMulN1_32_loop /* Branch to Loop if --ctr != 0 */
bniMulN1_32_label:
addze len,len /* Add carry flag to carry word */
stw len,4(out)
blr
#undef SP
#undef RTOC
#undef out
#undef in
#undef len
#undef k
/*
* MulAdd1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*/
/*
* asm unsigned
* bniMulAdd1_32(register unsigned *out, register unsigned const *in,
* register unsigned len, register unsigned k)
*/
#define SP r1
#define RTOC r2
#define out r3
#define in r4
#define len r5
#define k r6
.globl _bniMulAdd1_32
_bniMulAdd1_32:
lwz r7,0(in) /* Load first word of in in r7 */
lwz r0,0(out) /* Load first word of out into r0 */
mullw r8,r7,k /* Low half of multiply in r8 */
mtctr len /* Move len into CTR */
mulhwu len,r7,k /* High half of multiply in len */
addc r8,r8,r0 /* r8 = r8 + r0 */
stw r8,0(out) /* Store result to memory */
bdz- bniMulAdd1_32_label /* Branch to Label if --ctr == 0 */
bniMulAdd1_32_loop:
lwzu r7,4(in) /* r7 = *++in */
lwzu r0,4(out) /* r0 = *++out */
mullw r8,r7,k /* r8 = low word of product */
adde r8,r8,len /* Add carry word len and carry bit CF to r8 */
mulhwu len,r7,k /* len is high word of product, for carry */
addze len,len /* Add carry bit from low add to r5 */
addc r8,r8,r0 /* r8 = r8 + r0 */
stw r8,0(out) /* *out = r8 */
bdnz+ bniMulAdd1_32_loop /* Branch to Loop if --ctr != 0 */
bniMulAdd1_32_label:
addze r3,r5 /* Add carry flag to r5 and move to r3 */
blr
#undef SP
#undef RTOC
#undef out
#undef in
#undef len
#undef k
/*
* MulSub1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*
* Multiply and subtract is rather a pain. If the subtract of the
* low word of the product from out[i] generates a borrow, we want to
* increment the carry word (initially in the range 0..0xfffffffe).
* However, the PPC's carry bit CF is *clear* after a subtract, so
* we want to add (1-CF) to the carry word. This is done using two
* instructions:
*
* SUBFME, subtract from minus one extended. This computes
* rD = ~rS + 0xffffffff + CF. Since rS is from 0 to 0xfffffffe,
* ~rS is from 1 through 0xffffffff, and the sum with 0xffffffff+CF is
* from 0 through 0xfffffffff, setting the carry flag unconditionally, and
* NOR, which is used as a bitwise invert NOT instruction.
*
* The SUBFME performs the computation rD = ~rS + 0xffffffff + CF,
* = (-rS - 1) + (CF - 1) = -(rS - CF + 1) - 1 = ~(rS + 1-CF),
* which is the bitwise complement of the value we want.
* We want to add the complement of that result to the low word of the
* product, which is just what a subtract would do, if only we could get
* the carry flag clear. But it's always set, except for SUBFE, and the
* operation we just performed unconditionally *sets* the carry flag. Ugh.
* So find the complement in a separate instruction.
*/
/*
* asm unsigned
* bniMulSub1_32(register unsigned *out, register unsigned const *in,
* register unsigned len, register unsigned k)
*/
#define SP r1
#define RTOC r2
#define out r3
#define in r4
#define len r5
#define k r6
.globl _bniMulSub1_32
_bniMulSub1_32:
lwz r7,0(in) /* Load first word of in in r7 */
lwz r0,0(out) /* Load first word of out into r0 */
mtctr len /* Move len into CTR */
mullw r8,r7,k /* Low half of multiply in r8 */
mulhwu len,r7,k /* High half of multiply in len */
subfc r8,r8,r0 /* r8 = r0 - r8, setting CF */
stw r8,0(out) /* Store result to memory */
subfme len,len /* First of two insns to add (1-CF) to len */
bdz- bniMulSub1_32_label /* Branch to Label if --ctr == 0 */
bniMulSub1_32_loop:
lwzu r7,4(in) /* r7 = *++in */
lwzu r0,4(out) /* r0 = *++out */
nor len,len,len /* Second of two insns to add (1-CF) to len */
mullw r8,r7,k /* r8 = low word of product */
addc r8,r8,len /* Add carry word len to r8 */
mulhwu len,r7,k /* len is high word of product, for carry */
addze len,len /* Add carry bit from low add to len */
subfc r8,r8,r0 /* r8 = r0 - r8 */
stw r8,0(out) /* *out = r8 */
subfme len,len /* First of two insns to add (1-CF) to len */
bdnz+ bniMulSub1_32_loop /* Branch to Loop if --ctr != 0 */
bniMulSub1_32_label:
nor r3,r5,r5 /* Finish adding (1-CF) to len, store in r3 */
blr
#undef SP
#undef RTOC
#undef out
#undef in
#undef len
#undef k
/* 45678901234567890123456789012345678901234567890123456789012345678901234567*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -