⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 armv4-mont.pl

📁 OpenSSL 0.9.8k 最新版OpenSSL
💻 PL
字号:
#!/usr/bin/env perl# ====================================================================# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL# project. The module is, however, dual licensed under OpenSSL and# CRYPTOGAMS licenses depending on where you obtain it. For further# details see http://www.openssl.org/~appro/cryptogams/.# ====================================================================# January 2007.# Montgomery multiplication for ARMv4.## Performance improvement naturally varies among CPU implementations# and compilers. The code was observed to provide +65-35% improvement# [depending on key length, less for longer keys] on ARM920T, and# +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code# base and compiler generated code with in-lined umull and even umlal# instructions. The latter means that this code didn't really have an # "advantage" of utilizing some "secret" instruction.## The code is interoperable with Thumb ISA and is rather compact, less# than 1/2KB. Windows CE port would be trivial, as it's exclusively# about decorations, ABI and instruction syntax are identical.$num="r0";	# starts as num argument, but holds &tp[num-1]$ap="r1";$bp="r2"; $bi="r2"; $rp="r2";$np="r3";$tp="r4";$aj="r5";$nj="r6";$tj="r7";$n0="r8";###########	# r9 is reserved by ELF as platform specific, e.g. TLS pointer$alo="r10";	# sl, gcc uses it to keep @GOT$ahi="r11";	# fp$nlo="r12";	# ip###########	# r13 is stack pointer$nhi="r14";	# lr###########	# r15 is program counter#### argument block layout relative to &tp[num-1], a.k.a. $num$_rp="$num,#12*4";# ap permanently resides in r1$_bp="$num,#13*4";# np permanently resides in r3$_n0="$num,#14*4";$_num="$num,#15*4";	$_bpend=$_num;$code=<<___;.text.global	bn_mul_mont.type	bn_mul_mont,%function.align	2bn_mul_mont:	stmdb	sp!,{r0,r2}		@ sp points at argument block	ldr	$num,[sp,#3*4]		@ load num	cmp	$num,#2	movlt	r0,#0	addlt	sp,sp,#2*4	blt	.Labrt	stmdb	sp!,{r4-r12,lr}		@ save 10 registers	mov	$num,$num,lsl#2		@ rescale $num for byte count	sub	sp,sp,$num		@ alloca(4*num)	sub	sp,sp,#4		@ +extra dword	sub	$num,$num,#4		@ "num=num-1"	add	$tp,$bp,$num		@ &bp[num-1]	add	$num,sp,$num		@ $num to point at &tp[num-1]	ldr	$n0,[$_n0]		@ &n0	ldr	$bi,[$bp]		@ bp[0]	ldr	$aj,[$ap],#4		@ ap[0],ap++	ldr	$nj,[$np],#4		@ np[0],np++	ldr	$n0,[$n0]		@ *n0	str	$tp,[$_bpend]		@ save &bp[num]	umull	$alo,$ahi,$aj,$bi	@ ap[0]*bp[0]	str	$n0,[$_n0]		@ save n0 value	mul	$n0,$alo,$n0		@ "tp[0]"*n0	mov	$nlo,#0	umlal	$alo,$nlo,$nj,$n0	@ np[0]*n0+"t[0]"	mov	$tp,sp.L1st:	ldr	$aj,[$ap],#4		@ ap[j],ap++	mov	$alo,$ahi	mov	$ahi,#0	umlal	$alo,$ahi,$aj,$bi	@ ap[j]*bp[0]	ldr	$nj,[$np],#4		@ np[j],np++	mov	$nhi,#0	umlal	$nlo,$nhi,$nj,$n0	@ np[j]*n0	adds	$nlo,$nlo,$alo	str	$nlo,[$tp],#4		@ tp[j-1]=,tp++	adc	$nlo,$nhi,#0	cmp	$tp,$num	bne	.L1st	adds	$nlo,$nlo,$ahi	mov	$nhi,#0	adc	$nhi,$nhi,#0	ldr	$tp,[$_bp]		@ restore bp	str	$nlo,[$num]		@ tp[num-1]=	ldr	$n0,[$_n0]		@ restore n0	str	$nhi,[$num,#4]		@ tp[num]=.Louter:	sub	$tj,$num,sp		@ "original" $num-1 value	sub	$ap,$ap,$tj		@ "rewind" ap to &ap[1]	sub	$np,$np,$tj		@ "rewind" np to &np[1]	ldr	$bi,[$tp,#4]!		@ *(++bp)	ldr	$aj,[$ap,#-4]		@ ap[0]	ldr	$nj,[$np,#-4]		@ np[0]	ldr	$alo,[sp]		@ tp[0]	ldr	$tj,[sp,#4]		@ tp[1]	mov	$ahi,#0	umlal	$alo,$ahi,$aj,$bi	@ ap[0]*bp[i]+tp[0]	str	$tp,[$_bp]		@ save bp	mul	$n0,$alo,$n0	mov	$nlo,#0	umlal	$alo,$nlo,$nj,$n0	@ np[0]*n0+"tp[0]"	mov	$tp,sp.Linner:	ldr	$aj,[$ap],#4		@ ap[j],ap++	adds	$alo,$ahi,$tj		@ +=tp[j]	mov	$ahi,#0	umlal	$alo,$ahi,$aj,$bi	@ ap[j]*bp[i]	ldr	$nj,[$np],#4		@ np[j],np++	mov	$nhi,#0	umlal	$nlo,$nhi,$nj,$n0	@ np[j]*n0	ldr	$tj,[$tp,#8]		@ tp[j+1]	adc	$ahi,$ahi,#0	adds	$nlo,$nlo,$alo	str	$nlo,[$tp],#4		@ tp[j-1]=,tp++	adc	$nlo,$nhi,#0	cmp	$tp,$num	bne	.Linner	adds	$nlo,$nlo,$ahi	mov	$nhi,#0	adc	$nhi,$nhi,#0	adds	$nlo,$nlo,$tj	adc	$nhi,$nhi,#0	ldr	$tp,[$_bp]		@ restore bp	ldr	$tj,[$_bpend]		@ restore &bp[num]	str	$nlo,[$num]		@ tp[num-1]=	ldr	$n0,[$_n0]		@ restore n0	str	$nhi,[$num,#4]		@ tp[num]=	cmp	$tp,$tj	bne	.Louter	ldr	$rp,[$_rp]		@ pull rp	add	$num,$num,#4		@ $num to point at &tp[num]	sub	$aj,$num,sp		@ "original" num value	mov	$tp,sp			@ "rewind" $tp	mov	$ap,$tp			@ "borrow" $ap	sub	$np,$np,$aj		@ "rewind" $np to &np[0]	subs	$tj,$tj,$tj		@ "clear" carry flag.Lsub:	ldr	$tj,[$tp],#4	ldr	$nj,[$np],#4	sbcs	$tj,$tj,$nj		@ tp[j]-np[j]	str	$tj,[$rp],#4		@ rp[j]=	teq	$tp,$num		@ preserve carry	bne	.Lsub	sbcs	$nhi,$nhi,#0		@ upmost carry	mov	$tp,sp			@ "rewind" $tp	sub	$rp,$rp,$aj		@ "rewind" $rp	and	$ap,$tp,$nhi	bic	$np,$rp,$nhi	orr	$ap,$ap,$np		@ ap=borrow?tp:rp.Lcopy:	ldr	$tj,[$ap],#4		@ copy or in-place refresh	str	sp,[$tp],#4		@ zap tp	str	$tj,[$rp],#4	cmp	$tp,$num	bne	.Lcopy	add	sp,$num,#4		@ skip over tp[num+1]	ldmia	sp!,{r4-r12,lr}		@ restore registers	add	sp,sp,#2*4		@ skip over {r0,r2}	mov	r0,#1.Labrt:	tst	lr,#1	moveq	pc,lr			@ be binary compatible with V4, yet	bx	lr			@ interoperable with Thumb ISA:-).size	bn_mul_mont,.-bn_mul_mont.asciz	"Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"___$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4print $code;close STDOUT;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -