📄 bni80386.s

📁 著名的加密软件的应用于电子邮件中
💻 S
字号:

### Assembly primitives for bignum library, 80386 family, 32-bit code.
###
### Copyright (C) 1995-1997 Pretty Good Privacy, Inc. All rights reserved.
###
### $Id: bni80386.s,v 1.4.2.1 1997/06/07 09:49:33 mhw Exp $
###
### Several primitives are included here. Only bniMulAdd1 is *really*
### critical, but once that's written, lnmMulN1 and bniMulSub1 are quite
### easy to write as well, so they are included here as well.
### bniDiv21 and bniModQ are so easy to write that they're included, too.
###
### All functions here are for 32-bit flat mode. I.e. near code and
### near data, although the near offsets are 32 bits.
### Preserved registers are esp, ebp, esi, edi and ebx. That last
### is needed by ELF for PIC, and differs from the IBM PC calling
### convention.

# Different assemblers have different conventions here
align4=4	# could be 2 or 4
align8=8	# could be 3 or 8
align16=16	# cound be 4 or 16


.text

# We declare each symbol with two names, to deal with ELF/a.out variances.
			.globl	bniMulN1_32
			.globl	_bniMulN1_32
			.globl	bniMulAdd1_32
			.globl	_bniMulAdd1_32
			.globl	bniMulSub1_32
			.globl	_bniMulSub1_32
			.globl	bniDiv21_32
			.globl	_bniDiv21_32
			.globl	bniModQ_32
			.globl	_bniModQ_32

## Register usage:
## %eax - low half of product
## %ebx - carry to next iteration
## %ecx - multiplier (k)
## %edx - high half of product
## %esi - source pointer
## %edi - dest pointer
## %ebp - loop counter
##
## Stack frame:
## +--------+ %esp+20 %esp+24 %esp+28 %esp+32 %esp+36
## | k |
## +--------+ %esp+16 %esp+20 %esp+24 %esp+28 %esp+32
## | len |
## +--------+ %esp+12 %esp+16 %esp+20 %esp+24 %esp+28
## | in |
## +--------+ %esp+8 %esp+12 %esp+16 %esp+20 %esp+24
## | out |
## +--------+ %esp+4 %esp+8 %esp+12 %esp+16 %esp+20
## | return |
## +--------+ %esp	%esp+4 %esp+8 %esp+12 %esp+16
## | %esi |
## +--------+	%esp	%esp+4 %esp+8 %esp+12
## | %ebp |
## +--------+	%esp	%esp+4 %esp+8
## | %ebx |
## +--------+	%esp	%esp+4
## | %edi  |
## +--------+	%esp

	.align	align16
bniMulN1_32:
_bniMulN1_32:
			pushl	%esi			# U
			movl	 12(%esp),%esi	# V	 load in
			pushl	%ebp		# U
			movl	 20(%esp),%ebp	# V	 load len
			pushl	%ebx		# U
			movl	 28(%esp),%ecx	# V	 load k
			pushl	%edi		# U
			movl	 20(%esp),%edi	# V	 load out

## First multiply step has no carry in.
			movl	 (%esi),%eax			# V
			leal	 -4(,%ebp,4),%ebx		# U	loop unrolling
			mull	 %ecx				# NP	 first multiply
			movl	 %eax,(%edi)			# U
			andl	 $12,%ebx			 # V	loop unrolling

			addl	 %ebx,%esi			# U	loop unrolling
			addl	 %ebx,%edi			# V	loop unrolling

			jmp	*m32_jumptable(%ebx)	# NP	loop unrolling

			.align	align4
m32_jumptable:
			.long	m32_case0
			.long	m32_case1
			.long	m32_case2
			.long	m32_case3

			nop
			.align	align8
			nop
			nop
			nop	# Get loop nicely aligned

m32_case0:
	subl	$4,%ebp		# U
	jbe	m32_done	 # V

m32_loop:
			movl	 4(%esi),%eax	# U
			movl	 %edx,%ebx		# V	 Remember carry for later
			addl	 $16,%esi		# U
			addl	 $16,%edi		# V
			mull	 %ecx			# NP
			addl	 %ebx,%eax		# U	 Add carry in from previous word
		adcl	 $0,%edx		# U
		movl	 %eax,-12(%edi)	 # V
m32_case3:
			movl	 -8(%esi),%eax	# U
			movl	 %edx,%ebx	# V	 Remember carry for later
			mull	 %ecx		 # NP
			addl	 %ebx,%eax	# U		Add carry in from previous word
			adcl	 $0,%edx		# U
			movl	 %eax,-8(%edi)	# V
m32_case2:
			movl	 -4(%esi),%eax	# U
			movl	 %edx,%ebx	# V	 Remember carry for later
			mull	 %ecx		 # NP
			addl	 %ebx,%eax	# U		Add carry in from previous word
			adcl	 $0,%edx		# U
			movl	 %eax,-4(%edi)	# V
m32_case1:
			movl	 (%esi),%eax		# U
			movl	 %edx,%ebx		# V	 Remember carry for later
			mull	 %ecx			# NP
			addl	 %ebx,%eax		# U	 Add carry in from previous word
			adcl	 $0,%edx			# U
			movl	 %eax,(%edi)		# V

			subl	 $4,%ebp			# U
			ja	m32_loop		# V

m32_done:
			movl	 %edx,4(%edi)	# U
			popl	 %edi			# V
			popl	 %ebx			# U
			popl	 %ebp			# V
			popl	 %esi			 # U
			ret				# NP


			.align	align16
bniMulAdd1_32:
_bniMulAdd1_32:

			pushl	%esi			# U
			movl	 12(%esp),%esi	# V	 load in
			pushl	%edi		# U
			movl	 12(%esp),%edi	# V	 load out
			pushl	%ebp		# U
			movl	 24(%esp),%ebp	# V	 load len
			pushl	%ebx		# U
			movl	 32(%esp),%ecx	# V	 load k

## First multiply step has no carry in.
			movl	 (%esi),%eax			# V
			movl	 (%edi),%ebx			# U
			mull	 %ecx		 		# NP	 first multiply
			addl	 %eax,%ebx			# U
			leal	 -4(,%ebp,4),%eax		# V	loop unrolling
			adcl	 $0,%edx				# U
			andl	 $12,%eax			# V	loop unrolling
			movl	 %ebx,(%edi)			# U

	addl	%eax,%esi		 # V	loop unrolling
	addl	%eax,%edi		 # U	loop unrolling

	jmp	*ma32_jumptable(%eax)	# NP	loop unrolling

	.align	align4
ma32_jumptable:
			.long	ma32_case0
			.long	ma32_case1
			.long	ma32_case2
			.long	ma32_case3

			.align	align8
			nop
			nop
			nop				# To align loop properly


	ma32_case0:
		subl	 $4,%ebp				# U
		jbe	ma32_done			# V

ma32_loop:
			movl	 4(%esi),%eax	# U
			movl	 %edx,%ebx		# V	 Remember carry for later
			addl	 $16,%esi		# U
			addl	 $16,%edi		# V
			mull	 %ecx			# NP
			addl	 %ebx,%eax		# U		Add carry in from previous word
			movl	 -12(%edi),%ebx	 # V
			adcl	 $0,%edx		# U
			addl	 %eax,%ebx	# V
			adcl	 $0,%edx		# U
			movl	 %ebx,-12(%edi)	 # V
ma32_case3:
			movl	 -8(%esi),%eax	# U
			movl	 %edx,%ebx	# V	 Remember carry for later
			mull	 %ecx		 # NP
			addl	 %ebx,%eax	# U			Add carry in from previous word
			movl	 -8(%edi),%ebx	# V
			adcl	 $0,%edx		# U
			addl	 %eax,%ebx	# V
			adcl	 $0,%edx		# U
			movl	 %ebx,-8(%edi)	# V
ma32_case2:
			movl	 -4(%esi),%eax	# U
			movl	 %edx,%ebx	# V	 Remember carry for later
			mull	 %ecx		 # NP
			addl	 %ebx,%eax	# U			Add carry in from previous word
			movl	 -4(%edi),%ebx	# V
			adcl	 $0,%edx		# U
			addl	 %eax,%ebx	# V
			adcl	 $0,%edx		# U
			movl	 %ebx,-4(%edi)	# V
ma32_case1:
		movl	 (%esi),%eax	# U
		movl	 %edx,%ebx	# V	 Remember carry for later
		mull	%ecx			# NP
		addl	%ebx,%eax		# U		Add carry in from previous word
		movl	(%edi),%ebx		# V
		adcl	$0,%edx			# U
		addl	%eax,%ebx		# V
		adcl	$0,%edx			# U
		movl	%ebx,(%edi)		# V

			subl	 $4,%ebp			# U
			ja	ma32_loop		# V

ma32_done:
			popl	 %ebx			# U
			popl	 %ebp			# V
			movl	 %edx,%eax		# U
			popl	 %edi			# V
			popl	 %esi			 # U
			ret				# NP


		.align align16
bniMulSub1_32:
_bniMulSub1_32:
			pushl	%esi			# U
			movl	 12(%esp),%esi	# V	 load in
			pushl	%edi		# U
			movl	 12(%esp),%edi	# V	 load out
			pushl	%ebp		# U
			movl	 24(%esp),%ebp	# V	 load len
			pushl	%ebx		# U
			movl	 32(%esp),%ecx	# V	 load k

/* First multiply step has no carry in. */
			movl	 (%esi),%eax			# V
			movl	 (%edi),%ebx			# U
			mull	 %ecx		 		# NP	 first multiply
			subl	 %eax,%ebx			# U
			leal	 -4(,%ebp,4),%eax		# V	loop unrolling
			adcl	 $0,%edx				 # U
			andl	 $12,%eax			 # V	loop unrolling
			movl	 %ebx,(%edi)			# U

			addl	 %eax,%esi			# V	loop unrolling
			addl	 %eax,%edi			# U	loop unrolling

			jmp	*ms32_jumptable(%eax)	# NP	 loop unrolling

			.align	align4
ms32_jumptable:
			.long	ms32_case0
			.long	ms32_case1
			.long	ms32_case2
			.long	ms32_case3

.align	align8
nop
nop
nop

ms32_case0:
	subl	$4,%ebp		# U
	jbe	ms32_done	# V

ms32_loop:
			movl	 4(%esi),%eax	# U
			movl	 %edx,%ebx		# V	 Remember carry for later
			addl	 $16,%esi		# U
			addl	 $16,%edi		# V
			mull	 %ecx			# NP
			addl	 %ebx,%eax		# U		Add carry in from previous word
			movl	 -12(%edi),%ebx	 # V
			adcl	 $0,%edx		# U
			subl	 %eax,%ebx	# V
			adcl	 $0,%edx		# U
			movl	 %ebx,-12(%edi)	 # V
ms32_case3:
			movl	 -8(%esi),%eax	# U
			movl	 %edx,%ebx	# V	 Remember carry for later
			mull	 %ecx		 # NP
			addl	 %ebx,%eax	# U			Add carry in from previous word
			movl	 -8(%edi),%ebx	# V
			adcl	 $0,%edx		# U
			subl	 %eax,%ebx	# V
			adcl	 $0,%edx		# U
			movl	 %ebx,-8(%edi)	# V
ms32_case2:
			movl	 -4(%esi),%eax	# U
			movl	 %edx,%ebx	# V	 Remember carry for later
			mull	 %ecx		 # NP
			addl	 %ebx,%eax	# U			Add carry in from previous word
			movl	 -4(%edi),%ebx	# V
			adcl	 $0,%edx		# U
			subl	 %eax,%ebx	# V
			adcl	 $0,%edx		# U
			movl	 %ebx,-4(%edi)	# V
ms32_case1:
			movl	 (%esi),%eax		# U
			movl	 %edx,%ebx		# V	 Remember carry for later
			mull	 %ecx			# NP
			addl	 %ebx,%eax		# U		Add carry in from previous word
			movl	 (%edi),%ebx		# V
			adcl	 $0,%edx			# U
			subl	 %eax,%ebx		# V
			adcl	 $0,%edx			# U
			movl	 %ebx,(%edi)		# V

			subl	 $4,%ebp			# U
			ja	ms32_loop		# V

ms32_done:
			popl	 %ebx			# U
			popl	 %ebp			# V
			movl	 %edx,%eax		# U
			popl	 %edi			# V
			popl	 %esi			# U
			ret				# NP

## Two-word by one-word divide. Stores quotient, returns remainder.
## BNWORD32 bniDiv21_32(BNWORD32 *q, BNWORD32 nh, BNWORD32 nl, BNWORD32 d)
##	4	8		12	16

	.align align16
bniDiv21_32:
_bniDiv21_32:
			movl	 8(%esp),%edx	# U			Load nh
			movl	 12(%esp),%eax	# V	 Load nl
			movl	 4(%esp),%ecx	# U	Load q
			divl	 16(%esp)	# NP
			movl	 %eax,(%ecx)	# U	Store quotient
			movl	 %edx,%eax	# V	 Return remainder
			ret

## Multi-word by one-word remainder.
## This speeds up key generation. It's not worth unrolling and so on;
## using 32-bit divides is enough of a speedup.
##
## The modulus (in %ebp) is often 16 bits.  Given that the dividend is 32
## bits, the chances of saving the first divide because the high word of the
## dividend is less than the modulus are low enough it's not worth taking
## the cycles to test for it.
##
## unsigned bniModQ_32(BNWORD32 const *n, unsigned len, unsigned d)
##	4		8	12
	.align align16
bniModQ_32:
_bniModQ_32:
			movl	 4(%esp),%eax				# U		Load n
			pushl	%ebp					# V
			movl	 12(%esp),%ebp				# U		Load len
			pushl	%esi					# V
			leal	 -4(%eax,%ebp,4),%esi	# U
			movl	 20(%esp),%ecx			# V	Load d
			xorl	 %edx,%edx				# U		Clear MSW for first divide
modq32_loop:
			movl	 (%esi),%eax				# U
			subl	 $4,%esi					# V
			divl	 %ecx					# NP
			decl	 %ebp					# U
			jnz	modq32_loop				# V

			popl	 %esi					# U
			movl	 %edx,%eax				# V
			popl	 %ebp					# U
			ret						# NP
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -