bni80386.asm

来自「vc环境下的pgp源码」· 汇编代码 · 共 435 行
ASM
435 行
;;; Assembly primitives for bignum library, 80386 family, 32-bit code.
;;;
;;; $Id: bni80386.asm,v 1.1 1997/12/14 11:30:22 wprice Exp $
;;;
;;; Several primitives are included here.  Only bniMulAdd1 is *really*
;;; critical, but once that's written, bniMulN1 and bniMulSub1 are quite
;;; easy to write as well, so they are included here as well.
;;; bniDiv21 and bniModQ are so easy to write that they're included, too.
;;;
;;; All functions here are for 32-bit flat mode.  I.e. near code and
;;; near data, although the near offsets are 32 bits.
;;;
;;; The usual 80x86 calling conventions have AX, BX, CX and DX
;;; volatile, and SI, DI, SP and BP preserved across calls.
;;; This includes the "E"xtended forms of all of those registers
;;; 
;;; However, just to be confusing, recent 32-bit DOS compilers have
;;; quietly changed that to require EBX preserved across calls, too.
;;; Joy.

.386
;_TEXT   segment para public use32 'CODE' ; 16-PGPByte aligned because 486 cares
;_TEXT	ends

ifdef @Version
if @Version le 510
FLAT	group	_TEXT
endif
else
FLAT	group	_TEXT
endif
	assume	cs:FLAT, ds:FLAT, ss:FLAT
_TEXT   segment para public use32 'CODE' ; 16-PGPByte aligned because 486 cares

	public  _bniMulN1_32
	public  _bniMulAdd1_32
	public  _bniMulSub1_32
	public	_bniDiv21_32
	public	_bniModQ_32

;; Register usage:
;; eax - low half of product
;; ebx - carry to next iteration
;; ecx - multiplier (k)
;; edx - high half of product
;; esi - source pointer
;; edi - dest pointer
;; ebp - loop counter
;;
;; Stack frame:
;; +--------+ esp+20  esp+24  esp+28  esp+32  esp+36
;; |    k   |
;; +--------+ esp+16  esp+20  esp+24  esp+28  esp+32
;; |   len  |
;; +--------+ esp+12  esp+16  esp+20  esp+24  esp+28
;; |   in   |
;; +--------+ esp+8   esp+12  esp+16  esp+20  esp+24
;; |   out  |
;; +--------+ esp+4   esp+8   esp+12  esp+16  esp+20
;; | return |
;; +--------+ esp     esp+4   esp+8   esp+12  esp+16
;; |   esi  |
;; +--------+         esp     esp+4   esp+8   esp+12
;; |   ebp  |
;; +--------+                 esp     esp+4   esp+8
;; |   ebx  |
;; +--------+                         esp     esp+4
;; |   edi  |
;; +--------+                                 esp

	align	16
_bniMulN1_32	proc	near

	push	esi		; U
	mov	esi,[esp+12]	;  V	load in
	push	ebp		; U
	mov	ebp,[esp+20]	;  V	load len
	push	ebx		; U
	mov	ecx,[esp+28]	;  V	load k
	push	edi		; U
	mov	edi,[esp+20]	;  V	load out

;; First multiply step has no carry in.
	mov	eax,[esi]	; U
	lea	ebx,[ebp*4-4]	;  V	loop unrolling
	mul	ecx		; NP	first multiply
	mov	[edi],eax	; U
	and	ebx,12		;  V	loop unrolling

	add	esi,ebx		; U	loop unrolling
	add	edi,ebx		;  V	loop unrolling

	jmp	DWORD PTR m32_jumptable[ebx]	; NP	loop unrolling

	align	4
m32_jumptable:
	dd	m32_case0
	dd	m32_case1
	dd	m32_case2
	dd	m32_case3

	nop
	align	8
	nop
	nop
	nop	; Get loop nicely aligned

m32_case0:
	sub	ebp,4		; U
	jbe	SHORT m32_done	;  V

m32_loop:
	mov	eax,[esi+4]	; U
	mov	ebx,edx		;  V	Remember carry for later
	add	esi,16		; U
	add	edi,16		;  V
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	adc	edx,0		; U
	mov	[edi-12],eax	;  V
m32_case3:
	mov	eax,[esi-8]	; U
	mov	ebx,edx		;  V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	adc	edx,0		; U
	mov	[edi-8],eax	;  V
m32_case2:
	mov	eax,[esi-4]	; U
	mov	ebx,edx		;  V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	adc	edx,0		; U
	mov	[edi-4],eax	;  V
m32_case1:
	mov	eax,[esi]	; U
	mov	ebx,edx		;  V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	adc	edx,0		; U
	mov	[edi],eax	;  V

	sub	ebp,4		; U
	ja	SHORT m32_loop	;  V

m32_done:
	mov	[edi+4],edx	; U
	pop	edi		;  V
	pop	ebx		; U
	pop	ebp		;  V
	pop	esi		; U
	ret			; NP
_bniMulN1_32	endp


	align	16
_bniMulAdd1_32	proc	near

	push	esi		; U
	mov	esi,[esp+12]	;  V	load in
	push	edi		; U
	mov	edi,[esp+12]	;  V	load out
	push	ebp		; U
	mov	ebp,[esp+24]	;  V	load len
	push	ebx		; U
	mov	ecx,[esp+32]	;  V	load k

;; First multiply step has no carry in.
	mov	eax,[esi]	; U
	mov	ebx,[edi]	;  V
	mul	ecx		; NP	first multiply
	add	ebx,eax		; U
	lea	eax,[ebp*4-4]	;  V	loop unrolling
	adc	edx,0		; U
	and	eax,12		;  V	loop unrolling
	mov	[edi],ebx	; U

	add	esi,eax		;  V	loop unrolling
	add	edi,eax		; U	loop unrolling

	jmp	DWORD PTR ma32_jumptable[eax]	; NP	loop unrolling

	align	4
ma32_jumptable:
	dd	ma32_case0
	dd	ma32_case1
	dd	ma32_case2
	dd	ma32_case3

	nop
	align	8
	nop
	nop
	nop			; To align loop properly


ma32_case0:
	sub	ebp,4		; U
	jbe	SHORT ma32_done	;  V

ma32_loop:
	mov	eax,[esi+4]	; U
	mov	ebx,edx		;  V	Remember carry for later
	add	esi,16		; U
	add	edi,16		;  V
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi-12]	;  V
	adc	edx,0		; U
	add	ebx,eax		;  V
	adc	edx,0		; U
	mov	[edi-12],ebx	;  V
ma32_case3:
	mov	eax,[esi-8]	; U
	mov	ebx,edx		;  V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi-8]	;  V
	adc	edx,0		; U
	add	ebx,eax		;  V
	adc	edx,0		; U
	mov	[edi-8],ebx	;  V
ma32_case2:
	mov	eax,[esi-4]	; U
	mov	ebx,edx		;  V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi-4]	;  V
	adc	edx,0		; U
	add	ebx,eax		;  V
	adc	edx,0		; U
	mov	[edi-4],ebx	;  V
ma32_case1:
	mov	eax,[esi]	; U
	mov	ebx,edx		;  V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi]	;  V
	adc	edx,0		; U
	add	ebx,eax		;  V
	adc	edx,0		; U
	mov	[edi],ebx	;  V

	sub	ebp,4		; U
	ja	SHORT ma32_loop	;  V

ma32_done:
	pop	ebx		; U
	pop	ebp		;  V
	mov	eax,edx		; U
	pop	edi		;  V
	pop	esi		; U
	ret			; NP
_bniMulAdd1_32	endp


	align	16
_bniMulSub1_32	proc	near
	push	esi		; U
	mov	esi,[esp+12]	;  V	load in
	push	edi		; U
	mov	edi,[esp+12]	;  V	load out
	push	ebp		; U
	mov	ebp,[esp+24]	;  V	load len
	push	ebx		; U
	mov	ecx,[esp+32]	;  V	load k

;; First multiply step has no carry in.
	push	esi		; U
	mov	esi,[esp+12]	;  V	load in
	push	edi		; U
	mov	edi,[esp+12]	;  V	load out
	push	ebp		; U
	mov	ebp,[esp+24]	;  V	load len
	mov	ecx,[esp+28]	; U	load k

;; First multiply step has no carry in.
	mov	eax,[esi]	;  V
	mov	ebx,[edi]	; U
	mul	ecx		; NP	first multiply
	sub	ebx,eax		; U
	lea	eax,[ebp*4-4]	;  V	loop unrolling
	adc	edx,0		; U
	and	eax,12		;  V	loop unrolling
	mov	[edi],ebx	; U

	add	esi,eax		;  V	loop unrolling
	add	edi,eax		; U	loop unrolling

	jmp	DWORD PTR ms32_jumptable[eax]	; NP	loop unrolling

	align	4
ms32_jumptable:
	dd	ms32_case0
	dd	ms32_case1
	dd	ms32_case2
	dd	ms32_case3

	nop
	align	8
	nop
	nop
	nop

ms32_case0:
	sub	ebp,4		; U
	jbe	SHORT ms32_done	;  V

ms32_loop:
	mov	eax,[esi+4]	; U
	mov	ebx,edx		;  V	Remember carry for later
	add	esi,16		; U
	add	edi,16		;  V
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi-12]	;  V
	adc	edx,0		; U
	sub	ebx,eax		;  V
	adc	edx,0		; U
	mov	[edi-12],ebx	;  V
ms32_case3:
	mov	eax,[esi-8]	; U
	mov	ebx,edx		;  V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi-8]	;  V
	adc	edx,0		; U
	sub	ebx,eax		;  V
	adc	edx,0		; U
	mov	[edi-8],ebx	;  V
ms32_case2:
	mov	eax,[esi-4]	; U
	mov	ebx,edx		;  V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi-4]	;  V
	adc	edx,0		; U
	sub	ebx,eax		;  V
	adc	edx,0		; U
	mov	[edi-4],ebx	;  V
ms32_case1:
	mov	eax,[esi]	; U
	mov	ebx,edx		;  V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi]	;  V
	adc	edx,0		; U
	sub	ebx,eax		;  V
	adc	edx,0		; U
	mov	[edi],ebx	;  V

	sub	ebp,4		; U
	ja	SHORT ms32_loop	;  V

ms32_done:
	pop	ebx		; U
	pop	ebp		;  V
	mov	eax,edx		; U
	pop	edi		;  V
	pop	esi		; U
	ret			; NP
_bniMulSub1_32	endp



;; Two-word by one-word divide.  Stores quotient, returns remainder.
;; BNWORD32 bniDiv21_32(BNWORD32 *q, BNWORD32 nh, BNWORD32 nl, BNWORD32 d)
;;                      4            8            12           16
align 4
_bniDiv21_32	proc	near
	mov	edx,[esp+8]		; U	Load nh
	mov	eax,[esp+12]		;  V	Load nl
	mov	ecx,[esp+4]		; U	Load q
	div	DWORD PTR [esp+16]	; NP
	mov	[ecx],eax		; U	Store quotient
	mov	eax,edx			;  V	Return remainder
	ret
_bniDiv21_32	endp

;; Multi-word by one-word remainder.
;; This speeds up key generation.  It's not worth unrolling and so on;
;; using 32-bit divides is enough of a speedup.
;;
;; The modulus (in ebp) is often 16 bits.  Given that the dividend is 32
;; bits, the chances of saving the first divide because the high word of the
;; dividend is less than the modulus are low enough it's not worth taking
;; the cycles to test for it.
;;
;; unsigned bniModQ_32(BNWORD32 const *n, unsigned len, unsigned d)
;;                     4                  8             12
align 4
_bniModQ_32	proc	near
	mov	eax,[esp+4]		; U	Load n
	push	ebp			;  V
	mov	ebp,[esp+12]		; U	Load len
	push	esi			;  V
	lea	esi,[ebp*4+eax-4]	; U
	mov	ecx,[esp+20]		;  V	Load d
	xor	edx,edx			; U	Clear edx for first iteration
modq32_loop:
	mov	eax,[esi]		; U	Load new low word for divide
	sub	esi,4			;  V
	div	ecx			; NP	edx = edx:eax % ecx
	dec	ebp			; U
	jnz	SHORT modq32_loop	;  V

	pop	esi			; U
	mov	eax,edx			;  V	Return remainder in eax
	pop	ebp			; U
	ret				; NP
_bniModQ_32	endp

	movl	4(%esp),%eax		# U	Load n
	pushl	%ebp			#  V
	movl	12(%esp),%ebp		# U	Load len
	pushl	%esi			#  V
	leal	-4(%eax,%ebp,4),%esi	# U
	movl	20(%esp),%ecx		#  V	Load d
	xorl	%edx,%edx		# U	Clear MSW for first divide
modq32_loop:
	movl	(%esi),%eax		# U
	subl	$4,%esi			#  V
	divl	%ecx			# NP
	decl	%ebp			# U
	jnz	modq32_loop		#  V

	popl	%esi			# U
	movl	%edx,%eax		#  V
	popl	%ebp			# U
	ret				# NP

_TEXT	ends

	end
bni80386.asm - 源码说明

本页面展示了「vc环境下的pgp源码」中的 bni80386.asm 源码文件，采用汇编编程语言编写，共 435 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与pgp相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?