📄 bni80386.asm

📁 著名的加密软件的应用于电子邮件中
💻 ASM
字号:
;;; Assembly primitives for bignum library, 80386 family, 32-bit code.
;;;
;;; Copyright (C) 1995-1997 Pretty Good Privacy, Inc. All rights reserved.
;;;
;;; $Id: bni80386.asm,v 1.5.2.1 1997/06/07 09:49:32 mhw Exp $
;;;
;;; Several primitives are included here. Only bniMulAdd1 is *really*
;;; critical, but once that's written, bniMulN1 and bniMulSub1 are quite
;;; easy to write as well, so they are included here as well.
;;; bniDiv21 and bniModQ are so easy to write that they're included, too.
;;;
;;; All functions here are for 32-bit flat mode. I.e. near code and
;;; near data, although the near offsets are 32 bits.
;;;
;;; The usual 80x86 calling conventions have AX, BX, CX and DX
;;; volatile, and SI, DI, SP and BP preserved across calls.
;;; This includes the "E"xtended forms of all of those registers
;;;
;;; However, just to be confusing, recent 32-bit DOS compilers have
;;; quietly changed that to require EBX preserved across calls, too.
;;; Joy.

.386
;_TEXT segment para public use32 'CODE' ; 16-byte aligned because 486 cares
;_TEXT	ends

ifdef @Version
if @Version le 510
FLAT	group	 _TEXT
endif
else
FLAT	group	 _TEXT
endif
	assume	cs:FLAT, ds:FLAT, ss:FLAT
_TEXT segment para public use32 'CODE' ; 16-byte aligned because 486 cares

			public _bniMulN1_32
			public _bniMulAdd1_32
			public _bniMulSub1_32
			public	_bniDiv21_32
			public	_bniModQ_32

;; Register usage:
;; eax - low half of product
;; ebx - carry to next iteration
;; ecx - multiplier (k)
;; edx - high half of product
;; esi - source pointer
;; edi - dest pointer
;; ebp - loop counter
;;
;; Stack frame:
;; +--------+ esp+20 esp+24 esp+28 esp+32 esp+36
;; |    k   |
;; +--------+ esp+16 esp+20 esp+24 esp+28 esp+32
;; |  len   |
;; +--------+ esp+12 esp+16 esp+20 esp+24 esp+28
;; |  in    |
;; +--------+ esp+8 esp+12 esp+16 esp+20 esp+24
;; |  out   |
;; +--------+ esp+4 esp+8 esp+12 esp+16 esp+20
;; | return |
;; +--------+ esp	esp+4 esp+8 esp+12 esp+16
;; | esi |
;; +--------+	esp	esp+4 esp+8 esp+12
;; | ebp    |
;; +--------+	esp	esp+4 esp+8
;; | ebx |
;; +--------+	esp	esp+4
;; | edi |
;; +--------+ esp

		align	16
	_bniMulN1_32	proc	 near

			push	 esi			; U
			mov	esi,[esp+12]	; V	load in
			push	 ebp		; U
			mov	ebp,[esp+20]	; V	load len
			push	 ebx		; U
			mov	ecx,[esp+28]	; V	load k
			push	 edi		; U
			mov	edi,[esp+20]	; V	load out

;; First multiply step has no carry in.
			mov		eax,[esi]	; U
			lea		ebx,[ebp*4-4]	; V	loop unrolling
			mul		ecx	 		; NP	 first multiply
			mov		[edi],eax		; U
			and		ebx,12			; V	loop unrolling

		add		esi,ebx			; U	loop unrolling
		add		edi,ebx			; V	loop unrolling

		jmp	DWORD PTR m32_jumptable[ebx]	 ; NP	loop unrolling

			align	4
m32_jumptable:
			dd		m32_case0
			dd		m32_case1
			dd		m32_case2
			dd		m32_case3

			nop
			align	8
			nop
			nop
			nop		; Get loop nicely aligned

m32_case0:
	sub	ebp,4		; U
	jbe	SHORT m32_done	; V

m32_loop:
	mov	eax,[esi+4]	; U
	mov	ebx,edx		 ; V	Remember carry for later
			add		esi,16			; U
			add		edi,16			; V
			mul		ecx	 		; NP
			add		eax,ebx			; U		Add carry in from previous word
			adc		edx,0			; U
			mov		[edi-12],eax	; V
m32_case3:
			mov		eax,[esi-8]		; U
			mov		ebx,edx			; V	Remember carry for later
			mul		ecx	 		; NP
			add		eax,ebx			; U		Add carry in from previous word
			adc		edx,0			; U
			mov		[edi-8],eax		; V
m32_case2:
			mov		eax,[esi-4]		; U
			mov		ebx,edx			; V	Remember carry for later
			mul		ecx	 		; NP
			add		eax,ebx			; U		Add carry in from previous word
			adc		edx,0			; U
			mov		[edi-4],eax		; V
m32_case1:
			mov		eax,[esi]		; U
			mov		ebx,edx			; V	Remember carry for later
			mul		ecx	 		; NP
			add		eax,ebx			; U		Add carry in from previous word
			adc		edx,0			; U
			mov		[edi],eax		; V

			sub		ebp,4			; U
			ja		SHORT m32_loop	; V

m32_done:
			mov		[edi+4],edx		; U
			pop		edi			; V
			pop		ebx			; U
			pop		ebp			; V
			pop		esi			; U
			ret					; NP
_bniMulN1_32	endp


	align	 16
_bniMulAdd1_32	proc	near

			push	 esi				; U
			mov	esi,[esp+12]	; V	load in
			push	 edi		; U
			mov	edi,[esp+12]	; V	load out
			push	 ebp		; U
			mov	ebp,[esp+24]	; V	load len
			push	 ebx		; U
			mov	ecx,[esp+32]	; V	load k

;; First multiply step has no carry in.
	mov	eax,[esi]	; U
	mov	ebx,[edi]	; V
	mul	ecx		; NP	first multiply
	add	ebx,eax		 ; U
			lea		eax,[ebp*4-4]	; V	loop unrolling
			adc		edx,0			; U
			and		eax,12			; V	loop unrolling
			mov		[edi],ebx		; U

			add		esi,eax			; V	loop unrolling
			add		edi,eax			; U	loop unrolling

			jmp		DWORD PTR ma32_jumptable[eax]	; NP	 loop unrolling

			align	4
ma32_jumptable:
			dd		ma32_case0
			dd		ma32_case1
			dd		ma32_case2
			dd		ma32_case3

			nop
			align	8
			nop
			nop
			nop						; To align loop properly


ma32_case0:
		sub		ebp,4					; U
		jbe	SHORT ma32_done	; V

ma32_loop:
			mov	eax,[esi+4]				; U
			mov	ebx,edx					; V	 Remember carry for later
			add	esi,16					; U
			add	edi,16					; V
			mul	ecx	 				; NP
			add	eax,ebx				; U		Add carry in from previous word
			mov		ebx,[edi-12]	; V
			adc		edx,0		 ; U
			add		ebx,eax		; V
			adc		edx,0		 ; U
			mov		[edi-12],ebx	; V
ma32_case3:
			mov	eax,[esi-8]			; U
			mov	ebx,edx				; V	Remember carry for later
			mul	ecx	 			; NP
			add	eax,ebx				; U		Add carry in from previous word
			mov	ebx,[edi-8]			; V
			adc	edx,0				; U
			add	ebx,eax				; V
			adc	edx,0				; U
			mov	[edi-8],ebx			; V
ma32_case2:
			mov	eax,[esi-4]			; U
			mov	ebx,edx				; V	Remember carry for later
			mul	ecx	 			; NP
			add	eax,ebx				; U		Add carry in from previous word
			mov	ebx,[edi-4]			; V
			adc	edx,0				; U
			add	ebx,eax				; V
		adc		edx,0			; U
		mov		[edi-4],ebx		; V
ma32_case1:
			mov	eax,[esi]		; U
			mov	ebx,edx			; V	Remember carry for later
			mul	ecx	 		; NP
			add	eax,ebx				; U		Add carry in from previous word
			mov	ebx,[edi]			; V
			adc	edx,0				; U
			add	ebx,eax				; V
			adc	edx,0				; U
			mov	[edi],ebx			; V

			sub	ebp,4				; U
			ja		SHORT ma32_loop	; V

ma32_done:
			pop	ebx	 			; U
			pop	ebp	 			; V
			mov	eax,edx				; U
			pop		edi			; V
			pop		esi			; U
			ret					; NP
_bniMulAdd1_32	endp


	align	 16
_bniMulSub1_32	proc	near
			push	 esi				; U
			mov	esi,[esp+12]	; V	load in
			push	 edi		; U
			mov	edi,[esp+12]	; V	load out
			push	 ebp		; U
			mov	ebp,[esp+24]	; V	load len
			push	 ebx		; U
			mov	ecx,[esp+32]	; V	load k

;; First multiply step has no carry in.
			push	 esi		; U
			mov	esi,[esp+12]	; V	load in
			push	 edi		; U
			mov	edi,[esp+12]	; V	load out
			push	 ebp		; U
			mov	ebp,[esp+24]	; V	load len
			mov	ecx,[esp+28]	; U	load k

;; First multiply step has no carry in.
			mov		eax,[esi]	; V
			mov		ebx,[edi]	; U
			mul		ecx	 	; NP	 first multiply
			sub		ebx,eax		; U
			lea		eax,[ebp*4-4]	; V	loop unrolling
			adc		edx,0			; U
			and		eax,12			; V	loop unrolling
			mov		[edi],ebx		; U

			add		esi,eax			; V	loop unrolling
			add		edi,eax			; U	loop unrolling

	jmp	DWORD PTR ms32_jumptable[eax]	; NP	loop unrolling

	align	 4
ms32_jumptable:
			dd		ms32_case0
			dd		ms32_case1
			dd		ms32_case2
			dd		ms32_case3

			nop
			align	8
			nop
			nop
			nop

ms32_case0:
		sub			ebp,4		; U
		jbe	SHORT ms32_done	; V

ms32_loop:
			mov		eax,[esi+4]	 ; U
			mov		ebx,edx		; V	Remember carry for later
			add		esi,16		; U
			add		edi,16		; V
			mul		ecx	 	; NP
			add		eax,ebx			; U		Add carry in from previous word
			mov		ebx,[edi-12]	; V
			adc		edx,0		 ; U
			sub		ebx,eax		; V
			adc		edx,0		 ; U
			mov		[edi-12],ebx	; V
ms32_case3:
			mov		eax,[esi-8]		; U
			mov		ebx,edx			; V	Remember carry for later
			mul		ecx	 		; NP
			add		eax,ebx			; U		Add carry in from previous word
			mov		ebx,[edi-8]		; V
			adc		edx,0			; U
			sub		ebx,eax			; V
			adc		edx,0			; U
			mov		[edi-8],ebx		; V
ms32_case2:
			mov		eax,[esi-4]		; U
			mov		ebx,edx			; V	Remember carry for later
			mul		ecx	 		; NP
			add		eax,ebx			; U		Add carry in from previous word
			mov		ebx,[edi-4]		; V
			adc		edx,0			; U
			sub		ebx,eax			; V
			adc		edx,0			; U
			mov		[edi-4],ebx		; V
ms32_case1:
			mov		eax,[esi]		; U
			mov		ebx,edx			; V	Remember carry for later
			mul		ecx	 		; NP
			add		eax,ebx			; U	Add carry in from previous word
			mov		ebx,[edi]		; V
		adc		edx,0			; U
		sub		ebx,eax			; V
		adc		edx,0			; U
		mov		[edi],ebx		; V

			sub		ebp,4			; U
			ja		SHORT ms32_loop	; V

ms32_done:
			pop		ebx	 		; U
			pop		ebp	 		; V
			mov		eax,edx			; U
			pop		edi	 		; V
			pop		esi	 		; U
			ret					; NP
_bniMulSub1_32	endp



;; Two-word by one-word divide. Stores quotient, returns remainder.
;; BNWORD32 bniDiv21_32(BNWORD32 *q, BNWORD32 nh, BNWORD32 nl, BNWORD32 d)
;; 4	8	12	16
align 4
_bniDiv21_32	proc	near
			mov		edx,[esp+8]	 		; U	Load nh
			mov		eax,[esp+12]			; V	 Load nl
			mov		ecx,[esp+4]	 		; U	Load q
			div		DWORD PTR [esp+16]		; NP
			mov		[ecx],eax			; U	Store quotient
			mov		eax,edx				; V	 Return remainder
			ret
_bniDiv21_32	endp

;; Multi-word by one-word remainder.
;; This speeds up key generation. It's not worth unrolling and so on;
;; using 32-bit divides is enough of a speedup.
;;
;; The modulus (in ebp) is often 16 bits.  Given that the dividend is 32
;; bits, the chances of saving the first divide because the high word of the
;; dividend is less than the modulus are low enough it's not worth taking
;; the cycles to test for it.
;;
;; unsigned bniModQ_32(BNWORD32 const *n, unsigned len, unsigned d)
;;	4		8		12
align 4
_bniModQ_32	 proc	near
			mov	eax,[esp+4]				; U		Load n
			push	 ebp					; V
			mov	ebp,[esp+12]				; U		Load len
			push	 esi					; V
			lea		esi,[ebp*4+eax-4]		; U
			mov		ecx,[esp+16]			; V	 Load d
			xor		edx,edx					; U	 Clear edx for first iteration
modq32_loop:
		mov			eax,[esi]				; U	Load new low word for divide
		sub			esi,4					; V
		div			ecx					; NP	edx = edx:eax % ecx
		dec			ebp					; U
			jnz		SHORT modq32_loop		; V

			pop		esi	 			 ; U
			mov		eax,edx				; V	 Return remainder in eax
			pop		ebp	 			 ; U
			ret						; NP
_bniModQ_32	 endp

			movl	 4(%esp),%eax			# U		Load n
			pushl	%ebp				# V
			movl	 12(%esp),%ebp			# U		Load len
			pushl	%esi				# V
			leal	 -4(%eax,%ebp,4),%esi	# U
			movl	 20(%esp),%ecx			# V	Load d
			xorl	 %edx,%edx				# U		Clear MSW for first divide
modq32_loop:
			movl	 (%esi),%eax				# U
			subl	 $4,%esi					# V
			divl	 %ecx					# NP
			decl	 %ebp					# U
			jnz	modq32_loop				# V

			popl	 %esi					# U
			movl	 %edx,%eax				# V
			popl	 %ebp					# U
			ret						# NP

_TEXT	ends

	end
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -