⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bnippc.c

📁 著名的加密软件的应用于电子邮件中
💻 C
字号:
/*
* bnippc.c - Assembly primitives for the bignum library, PowerPC version.
*
* Copyright (C) 1995-1997 Pretty Good Privacy, Inc. All rights reserved.
*
* $Id: bnippc.c,v 1.5.2.1 1997/06/07 09:49:37 mhw Exp $
*
* Register usage during function calls is:
* r0 - volatile
* r1 - stack pointer, preserved
* r2 - TOC pointer, preserved
* r3 - First argument and return value register
* r4-r10 - More argument registers, volatile
* r11-r12 - Volatile
* r13-r31 - Preserved
* LR, CTR, XER and MQ are all volatile.
* LR holds return address on entry.
*
* On the PPC 601, unrolling the loops more doesn't seem to speed things
* up at all. I'd be curious if other chips differed.
	*/
#include "bnippc.h"

#if __MWERKS__ < 0x800

#include "bnppcasm.h"	/* PowerPC assembler */

/*
* MulN1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*/
static const unsigned mulN1[] = {
			PPC_LWZ(7,4,0), 		/* Load first word of in in r7 */
			PPC_MULLW(8,7,6),		/* Low half of multiply in r8 */
			PPC_MTCTR(5),			/* Move len into CTR */
			PPC_ADDIC(0,0,0),		/* Clear carry bit for loop */
			PPC_MULHWU(5,7,6),		/* High half of multiply in r5 */
			PPC_STW(8,3,0),
			PPC_BC(18,31,7),		/* Branch to Label if --ctr == 0 */
/* Loop: */
			PPC_LWZU(7,4,4),		/* r7 = *++in */
			PPC_MULLW(8,7,6),		/* r8 = low word of product */
			PPC_ADDE(8,8,5),		/* Add carry word r5 and bit CF to r8 */
			PPC_STWU(8,3,4),		/* *++out = r8 */
			PPC_MULHWU(5,7,6),		/* r5 is high word of product, for carry word*/
			PPC_BC(16,31,-5),		/* Branch to Loop if --ctr != 0 */
/* Label: */
			PPC_ADDZE(5,5),			/* Add carry flag to r5 */
			PPC_STW(5,3,4),			/* out[1] = r5 */
			PPC_BLR()
	};

/*
* MulAdd1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*/
static unsigned const mulAdd1[] = {
	PPC_LWZ(7,4,0), 	/* Load first word of in in r7 */
			PPC_LWZ(0,3,0),			/* Load first word of out into r0 */
			PPC_MULLW(8,7,6),		/* Low half of multiply in r8 */
			PPC_MTCTR(5),			/* Move len into CTR */
			PPC_MULHWU(5,7,6),		/* High half of multiply in r5 */
			PPC_ADDC(8,8,0),		/* r8 = r8 + r0 */
			PPC_STW(8,3,0),			/* Store result to memory */
			PPC_BC(18,31,10),		/* Branch to Label if --ctr == 0 */
/* Loop: */
			PPC_LWZU(7,4,4),		/* r7 = *++in */
			PPC_LWZU(0,3,4),		/* r0 = *++out */
			PPC_MULLW(8,7,6),		/* r8 = low word of product */
			PPC_ADDE(8,8,5), 		/* Add carry word r5 and carry bit CF to r8 */
			PPC_MULHWU(5,7,6),		/* r5 is high word of product, for carry word*/
			PPC_ADDZE(5,5),			/* Add carry bit from low add to r5 */
			PPC_ADDC(8,8,0),		/* r8 = r8 + r0 */
			PPC_STW(8,3,0), 		/* *out = r8 */
			PPC_BC(16,31,-8),		/* Branch to Loop if --ctr != 0 */
/* Label: */
		PPC_ADDZE(3,5),				/* Add carry flag to r5 and move to r3 */
		PPC_BLR()
};

/*
* MulSub1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*
* Multiply and subtract is rather a pain. If the subtract of the
* low word of the product from out[i] generates a borrow, we want to
* increment the carry word (initially in the range 0..0xfffffffe).
* However, the PPC's carry bit CF is *clear* after a subtract, so
* we want to add (1-CF) to the carry word. This is done using two
* instructions:
*
* SUBFME, subtract from minus one extended. This computes
* rD = ~rS + 0xffffffff + CF. Since rS is from 0 to 0xfffffffe,
* ~rS is from 1 through 0xffffffff, and the sum with 0xffffffff+CF is
* from 0 through 0xfffffffff, setting the carry flag unconditionally, and
* NOR, which is used as a bitwise invert NOT instruction.
*
* The SUBFME performs the computation rD = ~rS + 0xffffffff + CF,
* = (-rS - 1) + (CF - 1) = -(rS - CF + 1) - 1 = ~(rS + 1-CF),
* which is the bitwise complement of the value we want.
* We want to add the complement of that result to the low word of the
* product, which is just what a subtract would do, if only we could get
* the carry flag clear. But it's always set, except for SUBFE, and the
* operation we just performed unconditionally *sets* the carry flag. Ugh.
* So find the complement in a separate instruction.
*/
static unsigned const mulSub1[] = {
			PPC_LWZ(7,4,0), 		/* Load first word of in in r7 */
			PPC_LWZ(0,3,0),			/* Load first word of out into r0 */
			PPC_MTCTR(5),			/* Move len into CTR */
			PPC_MULLW(8,7,6),		/* Low half of multiply in r8 */
			PPC_MULHWU(5,7,6),		/* High half of multiply in r5 */
			PPC_SUBFC(8,8,0),		/* r8 = r0 - r8, setting CF */
			PPC_STW(8,3,0),			/* Store result to memory */
			PPC_SUBFME(5,5),		/* First of two insns to add (1-CF) to r5 */
			PPC_BC(18,31,12),		/* Branch to Label if --ctr == 0 */
/* Loop: */
			PPC_LWZU(7,4,4),		/* r7 = *++in */
			PPC_LWZU(0,3,4),		/* r0 = *++out */
			PPC_NOR(5,5,5),			/* Second of two insns to add (1-CF) to r5 */
			PPC_MULLW(8,7,6),		/* r8 = low word of product */
			PPC_ADDC(8,8,5), 		/* Add carry word r5 to r8 */
			PPC_MULHWU(5,7,6),		/* r5 is high word of product, for carry word*/
			PPC_ADDZE(5,5),			/* Add carry bit from low add to r5 */
			PPC_SUBFC(8,8,0),		/* r8 = r0 - r8, setting CF */
			PPC_STW(8,3,0), 		/* *out = r8 */
			PPC_SUBFME(5,5),		/* First of two insns to add (1-CF) to r5 */
			PPC_BC(16,31,-10),		/* Branch to Loop if --ctr != 0 */
/* Label: */
			PPC_NOR(3,5,5),			/* Finish adding (1-CF) to r5, store in r3 */
			PPC_BLR()
	};

#if 0
/*
* Args: BNWORD32 *n, BNWORD32 const *mod, unsigned mlen, BNWORD32 inv)
* r3 r4 r5 r6
* r7, r8 and r9 are the triple-width accumulator.
* r0 and r10 are temporary registers.
* r11 and r12 are temporary pointers into n and mod, respectively.
* r2 (!) is another temporary register.
*/
static unsigned const montReduce[] = {
			PPC_MTCTR(5),	/* ??? */
			PPC_LWZ(7,3,0),			/* Load low word of n into r7 */
			PPC_LWZ(10,4,0),		/* Fetch low word of mod */
			PPC_MULLW(0,7,6),		/* Invert r7 into r0 */
			PPC_STW(0,3,0),			/* Store back for future use */
			PPC_MULHWU(8,10,7),		/* Get high word of whatnot */
			PPC_MULLW(10,10,7),		/* Get low word of it */
			PPC_ADDC(7,7,10),		/* Add low word of product to r7 */
			PPC_ADDZE(8,8),			/* Add carry to high word */
			PPC_
	

			PPC_MULHW(8,7,6),
			PPC_ADDC(7,7,0),		/* Add inverse back to r7 */
			PPC_ADDZE(8,8),
			PPC_
		
			PPC_LWZU(
/* Loop: */
			PPC_LWZU(0,11,4),
			PPC_LWZU(10,23,-4),
			PPC_MULLW(2,0,10),
			PPC_ADDC(7,7,2),
			PPC_MULHWU(0,0,10),
			PPC_ADDE(8,8,0),
			PPC_ADDZE(9,9),
			PPC_BC(16,31,-7),		/* Branch to Loop if --ctr != 0 */

			PPC_ADDIC_(count,-1),
			PPC_LWZU(0,x,4),
			PPC_ADDC(0,7,0),
PPC_STW(0,x,0),
PPC_ADDZE(7,8),
PPC_ADDZE(8,9),
PPC_LI(9,0),
PPC_BC(xx,2,yy),

};
#endif

/*
* Three overlapped transition vectors for three functions.
* A PowerPC transition vector for a (potentially) inter-module
* jump or call consists of two words, an instruction address
* and a Table Of Contents (TOC) pointer, which is loaded into
* r1. Since none of the routines here have global variables,
* they don't need a TOC pointer, so the value is unimportant.
* This array places an unintersting 32-bit value after each address.
	*/
unsigned const * const bniPPC_tv[] = {
			mulN1,
			mulAdd1,
			mulSub1,
			0
};

#else /* __MWERKS >= 0x800 */

/*
* MulN1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*/
asm void
bniMulN1_32(register unsigned *out, register unsigned const *in,
			register unsigned len, register unsigned k)
	{
			lwz 	 r7,0(in) 		/* Load first word of in in r7 */
			mtctr	len				/* Move len into CTR */
			mullw	r8,r7,k			/* Low half of multiply in r8 */
			addic	r0,r0,0			/* Clear carry bit for loop */
			mulhwu	len,r7,k		/* High half of multiply in len */
			stw 	 r8,0(out)		/* *out = r8 */
			mulhwu	len,r7,k		/* len is high word of product, for carry */
			bdz-	 label			/* Branch to Label if --ctr == 0 */
	loop:
			lwzu	 r7,4(in)		/* r7 = *++in */
			mullw	r8,r7,k			/* Low half of multiply in r8 */
			adde	 r8,r8,len		/* Add carry word len and bit CF to r8 */
			stwu	 r8,4(out)		/* *++out = r8 */
			mulhwu	len,r7,k		/* len is high word of product, for carry */
			bdnz+	loop			/* Branch to Loop if --ctr != 0 */
	label:
			addze	len,len			/* Add carry flag to carry word */
			stw 	 len,4(out)
			blr
	}

	/*
* MulAdd1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*/
asm unsigned
bniMulAdd1_32(register unsigned *out, register unsigned const *in,
			register unsigned len, register unsigned k)
	{
			lwz 	 r7,0(in) 		/* Load first word of in in r7 */
			lwz 	 r0,0(out)		/* Load first word of out into r0 */
			mullw	r8,r7,k 		/* Low half of multiply in r8 */
			mtctr	len 			/* Move len into CTR */
			mulhwu	len,r7,k		/* High half of multiply in len */
			addc	 r8,r8,r0		/* r8 = r8 + r0 */
			stw 	 r8,0(out)		/* Store result to memory */
			bdz-	 label			/* Branch to Label if --ctr == 0 */
	loop:
			lwzu	 r7,4(in)		/* r7 = *++in */
			lwzu	 r0,4(out)		/* r0 = *++out */
			mullw	r8,r7,k			/* r8 = low word of product */
			adde	 r8,r8,len		/* Add carry word len and carry bit CF to r8 */
			mulhwu	len,r7,k		/* len is high word of product, for carry */
			addze	len,len			/* Add carry bit from low add to r5 */
			addc	 r8,r8,r0		/* r8 = r8 + r0 */
			stw 	 r8,0(out)		/* *out = r8 */
			bdnz+	loop			/* Branch to Loop if --ctr != 0 */
	label:
			addze	r3,r5	 		/* Add carry flag to r5 and move to r3 */
			blr
	}

	/*
* MulSub1 expects (*out, *in, len, k), count >= 1
* r3 r4 r5 r6
*
* Multiply and subtract is rather a pain. If the subtract of the
* low word of the product from out[i] generates a borrow, we want to
* increment the carry word (initially in the range 0..0xfffffffe).
* However, the PPC's carry bit CF is *clear* after a subtract, so
* we want to add (1-CF) to the carry word. This is done using two
* instructions:
*
* SUBFME, subtract from minus one extended. This computes
* rD = ~rS + 0xffffffff + CF. Since rS is from 0 to 0xfffffffe,
* ~rS is from 1 through 0xffffffff, and the sum with 0xffffffff+CF is
* from 0 through 0xfffffffff, setting the carry flag unconditionally, and
* NOR, which is used as a bitwise invert NOT instruction.
*
* The SUBFME performs the computation rD = ~rS + 0xffffffff + CF,
* = (-rS - 1) + (CF - 1) = -(rS - CF + 1) - 1 = ~(rS + 1-CF),
* which is the bitwise complement of the value we want.
* We want to add the complement of that result to the low word of the
* product, which is just what a subtract would do, if only we could get
* the carry flag clear. But it's always set, except for SUBFE, and the
* operation we just performed unconditionally *sets* the carry flag. Ugh.
* So find the complement in a separate instruction.
*/
asm unsigned
bniMulSub1_32(register unsigned *out, register unsigned const *in,
	register unsigned len, register unsigned k)
{
			lwz 	 r7,0(in) 		/* Load first word of in in r7 */
			lwz 	 r0,0(out)		/* Load first word of out into r0 */
			mtctr	len 			/* Move len into CTR */
			mullw	r8,r7,k 		/* Low half of multiply in r8 */
			mulhwu	len,r7,k		/* High half of multiply in len */
			subfc	r8,r8,r0		/* r8 = r0 - r8, setting CF */
			stw 	 r8,0(out)		/* Store result to memory */
			subfme	len,len			/* First of two insns to add (1-CF) to len */
			bdz-	 label			/* Branch to Label if --ctr == 0 */
	loop:
			lwzu	 r7,4(in)		/* r7 = *++in */
			lwzu	 r0,4(out)		/* r0 = *++out */
			nor 	 len,len,len		/* Second of two insns to add (1-CF) to len */
			mullw	r8,r7,k			/* r8 = low word of product */
			addc	 r8,r8,len		/* Add carry word len to r8 */
			mulhwu	len,r7,k		/* len is high word of product, for carry */
			addze	len,len			/* Add carry bit from low add to len */
			subfc	r8,r8,r0		/* r8 = r0 - r8 */
			stw 	 r8,0(out)		/* *out = r8 */
			subfme	len,len			/* First of two insns to add (1-CF) to len */
			bdnz+	loop			/* Branch to Loop if --ctr != 0 */
	label:
			nor 	 r3,r5,r5		/* Finish adding (1-CF) to len, store in r3 */
			blr
	}

#endif /* __MWERKS >= 0x800 */
/* 45678901234567890123456789012345678901234567890123456789012345678901234567*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -