⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mips3.s

📁 mediastreamer2是开源的网络传输媒体流的库
💻 S
📖 第 1 页 / 共 3 页
字号:
.rdata.asciiz	"mips3.s, Version 1.1".asciiz	"MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"/* * ==================================================================== * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL * project. * * Rights for redistribution and usage in source and binary forms are * granted according to the OpenSSL license. Warranty of any kind is * disclaimed. * ==================================================================== *//* * This is my modest contributon to the OpenSSL project (see * http://www.openssl.org/ for more information about it) and is * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c * module. For updates see http://fy.chalmers.se/~appro/hpe/. * * The module is designed to work with either of the "new" MIPS ABI(5), * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under * IRIX 5.x not only because it doesn't support new ABIs but also * because 5.x kernels put R4x00 CPU into 32-bit mode and all those * 64-bit instructions (daddu, dmultu, etc.) found below gonna only * cause illegal instruction exception:-( * * In addition the code depends on preprocessor flags set up by MIPSpro * compiler driver (either as or cc) and therefore (probably?) can't be * compiled by the GNU assembler. GNU C driver manages fine though... * I mean as long as -mmips-as is specified or is the default option, * because then it simply invokes /usr/bin/as which in turn takes * perfect care of the preprocessor definitions. Another neat feature * offered by the MIPSpro assembler is an optimization pass. This gave * me the opportunity to have the code looking more regular as all those * architecture dependent instruction rescheduling details were left to * the assembler. Cool, huh? * * Performance improvement is astonishing! 'apps/openssl speed rsa dsa' * goes way over 3 times faster! * *					<appro@fy.chalmers.se> */#include <asm.h>#include <regdef.h>#if _MIPS_ISA>=4#define	MOVNZ(cond,dst,src)	\	movn	dst,src,cond#else#define	MOVNZ(cond,dst,src)	\	.set	noreorder;	\	bnezl	cond,.+8;	\	move	dst,src;	\	.set	reorder#endif.text.set	noat.set	reorder#define	MINUS4	v1.align	5LEAF(bn_mul_add_words)	.set	noreorder	bgtzl	a2,.L_bn_mul_add_words_proceed	ld	t0,0(a1)	jr	ra	move	v0,zero	.set	reorder.L_bn_mul_add_words_proceed:	li	MINUS4,-4	and	ta0,a2,MINUS4	move	v0,zero	beqz	ta0,.L_bn_mul_add_words_tail.L_bn_mul_add_words_loop:	dmultu	t0,a3	ld	t1,0(a0)	ld	t2,8(a1)	ld	t3,8(a0)	ld	ta0,16(a1)	ld	ta1,16(a0)	daddu	t1,v0	sltu	v0,t1,v0	/* All manuals say it "compares 32-bit				 * values", but it seems to work fine				 * even on 64-bit registers. */	mflo	AT	mfhi	t0	daddu	t1,AT	daddu	v0,t0	sltu	AT,t1,AT	sd	t1,0(a0)	daddu	v0,AT	dmultu	t2,a3	ld	ta2,24(a1)	ld	ta3,24(a0)	daddu	t3,v0	sltu	v0,t3,v0	mflo	AT	mfhi	t2	daddu	t3,AT	daddu	v0,t2	sltu	AT,t3,AT	sd	t3,8(a0)	daddu	v0,AT	dmultu	ta0,a3	subu	a2,4	PTR_ADD	a0,32	PTR_ADD	a1,32	daddu	ta1,v0	sltu	v0,ta1,v0	mflo	AT	mfhi	ta0	daddu	ta1,AT	daddu	v0,ta0	sltu	AT,ta1,AT	sd	ta1,-16(a0)	daddu	v0,AT	dmultu	ta2,a3	and	ta0,a2,MINUS4	daddu	ta3,v0	sltu	v0,ta3,v0	mflo	AT	mfhi	ta2	daddu	ta3,AT	daddu	v0,ta2	sltu	AT,ta3,AT	sd	ta3,-8(a0)	daddu	v0,AT	.set	noreorder	bgtzl	ta0,.L_bn_mul_add_words_loop	ld	t0,0(a1)	bnezl	a2,.L_bn_mul_add_words_tail	ld	t0,0(a1)	.set	reorder.L_bn_mul_add_words_return:	jr	ra.L_bn_mul_add_words_tail:	dmultu	t0,a3	ld	t1,0(a0)	subu	a2,1	daddu	t1,v0	sltu	v0,t1,v0	mflo	AT	mfhi	t0	daddu	t1,AT	daddu	v0,t0	sltu	AT,t1,AT	sd	t1,0(a0)	daddu	v0,AT	beqz	a2,.L_bn_mul_add_words_return	ld	t0,8(a1)	dmultu	t0,a3	ld	t1,8(a0)	subu	a2,1	daddu	t1,v0	sltu	v0,t1,v0	mflo	AT	mfhi	t0	daddu	t1,AT	daddu	v0,t0	sltu	AT,t1,AT	sd	t1,8(a0)	daddu	v0,AT	beqz	a2,.L_bn_mul_add_words_return	ld	t0,16(a1)	dmultu	t0,a3	ld	t1,16(a0)	daddu	t1,v0	sltu	v0,t1,v0	mflo	AT	mfhi	t0	daddu	t1,AT	daddu	v0,t0	sltu	AT,t1,AT	sd	t1,16(a0)	daddu	v0,AT	jr	raEND(bn_mul_add_words).align	5LEAF(bn_mul_words)	.set	noreorder	bgtzl	a2,.L_bn_mul_words_proceed	ld	t0,0(a1)	jr	ra	move	v0,zero	.set	reorder.L_bn_mul_words_proceed:	li	MINUS4,-4	and	ta0,a2,MINUS4	move	v0,zero	beqz	ta0,.L_bn_mul_words_tail.L_bn_mul_words_loop:	dmultu	t0,a3	ld	t2,8(a1)	ld	ta0,16(a1)	ld	ta2,24(a1)	mflo	AT	mfhi	t0	daddu	v0,AT	sltu	t1,v0,AT	sd	v0,0(a0)	daddu	v0,t1,t0	dmultu	t2,a3	subu	a2,4	PTR_ADD	a0,32	PTR_ADD	a1,32	mflo	AT	mfhi	t2	daddu	v0,AT	sltu	t3,v0,AT	sd	v0,-24(a0)	daddu	v0,t3,t2	dmultu	ta0,a3	mflo	AT	mfhi	ta0	daddu	v0,AT	sltu	ta1,v0,AT	sd	v0,-16(a0)	daddu	v0,ta1,ta0	dmultu	ta2,a3	and	ta0,a2,MINUS4	mflo	AT	mfhi	ta2	daddu	v0,AT	sltu	ta3,v0,AT	sd	v0,-8(a0)	daddu	v0,ta3,ta2	.set	noreorder	bgtzl	ta0,.L_bn_mul_words_loop	ld	t0,0(a1)	bnezl	a2,.L_bn_mul_words_tail	ld	t0,0(a1)	.set	reorder.L_bn_mul_words_return:	jr	ra.L_bn_mul_words_tail:	dmultu	t0,a3	subu	a2,1	mflo	AT	mfhi	t0	daddu	v0,AT	sltu	t1,v0,AT	sd	v0,0(a0)	daddu	v0,t1,t0	beqz	a2,.L_bn_mul_words_return	ld	t0,8(a1)	dmultu	t0,a3	subu	a2,1	mflo	AT	mfhi	t0	daddu	v0,AT	sltu	t1,v0,AT	sd	v0,8(a0)	daddu	v0,t1,t0	beqz	a2,.L_bn_mul_words_return	ld	t0,16(a1)	dmultu	t0,a3	mflo	AT	mfhi	t0	daddu	v0,AT	sltu	t1,v0,AT	sd	v0,16(a0)	daddu	v0,t1,t0	jr	raEND(bn_mul_words).align	5LEAF(bn_sqr_words)	.set	noreorder	bgtzl	a2,.L_bn_sqr_words_proceed	ld	t0,0(a1)	jr	ra	move	v0,zero	.set	reorder.L_bn_sqr_words_proceed:	li	MINUS4,-4	and	ta0,a2,MINUS4	move	v0,zero	beqz	ta0,.L_bn_sqr_words_tail.L_bn_sqr_words_loop:	dmultu	t0,t0	ld	t2,8(a1)	ld	ta0,16(a1)	ld	ta2,24(a1)	mflo	t1	mfhi	t0	sd	t1,0(a0)	sd	t0,8(a0)	dmultu	t2,t2	subu	a2,4	PTR_ADD	a0,64	PTR_ADD	a1,32	mflo	t3	mfhi	t2	sd	t3,-48(a0)	sd	t2,-40(a0)	dmultu	ta0,ta0	mflo	ta1	mfhi	ta0	sd	ta1,-32(a0)	sd	ta0,-24(a0)	dmultu	ta2,ta2	and	ta0,a2,MINUS4	mflo	ta3	mfhi	ta2	sd	ta3,-16(a0)	sd	ta2,-8(a0)	.set	noreorder	bgtzl	ta0,.L_bn_sqr_words_loop	ld	t0,0(a1)	bnezl	a2,.L_bn_sqr_words_tail	ld	t0,0(a1)	.set	reorder.L_bn_sqr_words_return:	move	v0,zero	jr	ra.L_bn_sqr_words_tail:	dmultu	t0,t0	subu	a2,1	mflo	t1	mfhi	t0	sd	t1,0(a0)	sd	t0,8(a0)	beqz	a2,.L_bn_sqr_words_return	ld	t0,8(a1)	dmultu	t0,t0	subu	a2,1	mflo	t1	mfhi	t0	sd	t1,16(a0)	sd	t0,24(a0)	beqz	a2,.L_bn_sqr_words_return	ld	t0,16(a1)	dmultu	t0,t0	mflo	t1	mfhi	t0	sd	t1,32(a0)	sd	t0,40(a0)	jr	raEND(bn_sqr_words).align	5LEAF(bn_add_words)	.set	noreorder	bgtzl	a3,.L_bn_add_words_proceed	ld	t0,0(a1)	jr	ra	move	v0,zero	.set	reorder.L_bn_add_words_proceed:	li	MINUS4,-4	and	AT,a3,MINUS4	move	v0,zero	beqz	AT,.L_bn_add_words_tail.L_bn_add_words_loop:	ld	ta0,0(a2)	subu	a3,4	ld	t1,8(a1)	and	AT,a3,MINUS4	ld	t2,16(a1)	PTR_ADD	a2,32	ld	t3,24(a1)	PTR_ADD	a0,32	ld	ta1,-24(a2)	PTR_ADD	a1,32	ld	ta2,-16(a2)	ld	ta3,-8(a2)	daddu	ta0,t0	sltu	t8,ta0,t0	daddu	t0,ta0,v0	sltu	v0,t0,ta0	sd	t0,-32(a0)	daddu	v0,t8	daddu	ta1,t1	sltu	t9,ta1,t1	daddu	t1,ta1,v0	sltu	v0,t1,ta1	sd	t1,-24(a0)	daddu	v0,t9	daddu	ta2,t2	sltu	t8,ta2,t2	daddu	t2,ta2,v0	sltu	v0,t2,ta2	sd	t2,-16(a0)	daddu	v0,t8		daddu	ta3,t3	sltu	t9,ta3,t3	daddu	t3,ta3,v0	sltu	v0,t3,ta3	sd	t3,-8(a0)	daddu	v0,t9		.set	noreorder	bgtzl	AT,.L_bn_add_words_loop	ld	t0,0(a1)	bnezl	a3,.L_bn_add_words_tail	ld	t0,0(a1)	.set	reorder.L_bn_add_words_return:	jr	ra.L_bn_add_words_tail:	ld	ta0,0(a2)	daddu	ta0,t0	subu	a3,1	sltu	t8,ta0,t0	daddu	t0,ta0,v0	sltu	v0,t0,ta0	sd	t0,0(a0)	daddu	v0,t8	beqz	a3,.L_bn_add_words_return	ld	t1,8(a1)	ld	ta1,8(a2)	daddu	ta1,t1	subu	a3,1	sltu	t9,ta1,t1	daddu	t1,ta1,v0	sltu	v0,t1,ta1	sd	t1,8(a0)	daddu	v0,t9	beqz	a3,.L_bn_add_words_return	ld	t2,16(a1)	ld	ta2,16(a2)	daddu	ta2,t2	sltu	t8,ta2,t2	daddu	t2,ta2,v0	sltu	v0,t2,ta2	sd	t2,16(a0)	daddu	v0,t8	jr	raEND(bn_add_words).align	5LEAF(bn_sub_words)	.set	noreorder	bgtzl	a3,.L_bn_sub_words_proceed	ld	t0,0(a1)	jr	ra	move	v0,zero	.set	reorder.L_bn_sub_words_proceed:	li	MINUS4,-4	and	AT,a3,MINUS4	move	v0,zero	beqz	AT,.L_bn_sub_words_tail.L_bn_sub_words_loop:	ld	ta0,0(a2)	subu	a3,4	ld	t1,8(a1)	and	AT,a3,MINUS4	ld	t2,16(a1)	PTR_ADD	a2,32	ld	t3,24(a1)	PTR_ADD	a0,32	ld	ta1,-24(a2)	PTR_ADD	a1,32	ld	ta2,-16(a2)	ld	ta3,-8(a2)	sltu	t8,t0,ta0	dsubu	t0,ta0	dsubu	ta0,t0,v0	sd	ta0,-32(a0)	MOVNZ	(t0,v0,t8)	sltu	t9,t1,ta1	dsubu	t1,ta1	dsubu	ta1,t1,v0	sd	ta1,-24(a0)	MOVNZ	(t1,v0,t9)	sltu	t8,t2,ta2	dsubu	t2,ta2	dsubu	ta2,t2,v0	sd	ta2,-16(a0)	MOVNZ	(t2,v0,t8)	sltu	t9,t3,ta3	dsubu	t3,ta3	dsubu	ta3,t3,v0	sd	ta3,-8(a0)	MOVNZ	(t3,v0,t9)	.set	noreorder	bgtzl	AT,.L_bn_sub_words_loop	ld	t0,0(a1)	bnezl	a3,.L_bn_sub_words_tail	ld	t0,0(a1)	.set	reorder.L_bn_sub_words_return:	jr	ra.L_bn_sub_words_tail:	ld	ta0,0(a2)	subu	a3,1	sltu	t8,t0,ta0	dsubu	t0,ta0	dsubu	ta0,t0,v0	MOVNZ	(t0,v0,t8)	sd	ta0,0(a0)	beqz	a3,.L_bn_sub_words_return	ld	t1,8(a1)	subu	a3,1	ld	ta1,8(a2)	sltu	t9,t1,ta1	dsubu	t1,ta1	dsubu	ta1,t1,v0	MOVNZ	(t1,v0,t9)	sd	ta1,8(a0)	beqz	a3,.L_bn_sub_words_return	ld	t2,16(a1)	ld	ta2,16(a2)	sltu	t8,t2,ta2	dsubu	t2,ta2	dsubu	ta2,t2,v0	MOVNZ	(t2,v0,t8)	sd	ta2,16(a0)	jr	raEND(bn_sub_words)#undef	MINUS4.align 5LEAF(bn_div_3_words)	.set	reorder	move	a3,a0		/* we know that bn_div_words doesn't				 * touch a3, ta2, ta3 and preserves a2				 * so that we can save two arguments				 * and return address in registers				 * instead of stack:-)				 */	ld	a0,(a3)	move	ta2,a1	ld	a1,-8(a3)	bne	a0,a2,.L_bn_div_3_words_proceed	li	v0,-1	jr	ra.L_bn_div_3_words_proceed:	move	ta3,ra	bal	bn_div_words	move	ra,ta3	dmultu	ta2,v0	ld	t2,-16(a3)	move	ta0,zero	mfhi	t1	mflo	t0	sltu	t8,t1,v1.L_bn_div_3_words_inner_loop:	bnez	t8,.L_bn_div_3_words_inner_loop_done	sgeu	AT,t2,t0	seq	t9,t1,v1	and	AT,t9	sltu	t3,t0,ta2	daddu	v1,a2	dsubu	t1,t3	dsubu	t0,ta2	sltu	t8,t1,v1	sltu	ta0,v1,a2	or	t8,ta0	.set	noreorder	beqzl	AT,.L_bn_div_3_words_inner_loop	dsubu	v0,1	.set	reorder.L_bn_div_3_words_inner_loop_done:	jr	raEND(bn_div_3_words).align	5LEAF(bn_div_words)	.set	noreorder	bnezl	a2,.L_bn_div_words_proceed	move	v1,zero	jr	ra	li	v0,-1		/* I'd rather signal div-by-zero				 * which can be done with 'break 7' */.L_bn_div_words_proceed:	bltz	a2,.L_bn_div_words_body	move	t9,v1	dsll	a2,1	bgtz	a2,.-4	addu	t9,1	.set	reorder	negu	t1,t9	li	t2,-1	dsll	t2,t1	and	t2,a0	dsrl	AT,a1,t1	.set	noreorder	bnezl	t2,.+8	break	6		/* signal overflow */	.set	reorder	dsll	a0,t9	dsll	a1,t9	or	a0,AT#define	QT	ta0#define	HH	ta1#define	DH	v1.L_bn_div_words_body:	dsrl	DH,a2,32	sgeu	AT,a0,a2	.set	noreorder	bnezl	AT,.+8	dsubu	a0,a2	.set	reorder	li	QT,-1	dsrl	HH,a0,32	dsrl	QT,32	/* q=0xffffffff */	beq	DH,HH,.L_bn_div_words_skip_div1	ddivu	zero,a0,DH	mflo	QT.L_bn_div_words_skip_div1:	dmultu	a2,QT	dsll	t3,a0,32	dsrl	AT,a1,32	or	t3,AT	mflo	t0	mfhi	t1.L_bn_div_words_inner_loop1:	sltu	t2,t3,t0	seq	t8,HH,t1	sltu	AT,HH,t1	and	t2,t8	sltu	v0,t0,a2	or	AT,t2	.set	noreorder	beqz	AT,.L_bn_div_words_inner_loop1_done	dsubu	t1,v0	dsubu	t0,a2	b	.L_bn_div_words_inner_loop1	dsubu	QT,1	.set	reorder.L_bn_div_words_inner_loop1_done:	dsll	a1,32	dsubu	a0,t3,t0	dsll	v0,QT,32	li	QT,-1	dsrl	HH,a0,32	dsrl	QT,32	/* q=0xffffffff */	beq	DH,HH,.L_bn_div_words_skip_div2	ddivu	zero,a0,DH	mflo	QT.L_bn_div_words_skip_div2:#undef	DH	dmultu	a2,QT	dsll	t3,a0,32	dsrl	AT,a1,32	or	t3,AT	mflo	t0	mfhi	t1.L_bn_div_words_inner_loop2:	sltu	t2,t3,t0	seq	t8,HH,t1	sltu	AT,HH,t1	and	t2,t8	sltu	v1,t0,a2	or	AT,t2	.set	noreorder	beqz	AT,.L_bn_div_words_inner_loop2_done	dsubu	t1,v1	dsubu	t0,a2	b	.L_bn_div_words_inner_loop2	dsubu	QT,1	.set	reorder.L_bn_div_words_inner_loop2_done:	#undef	HH	dsubu	a0,t3,t0	or	v0,QT	dsrl	v1,a0,t9	/* v1 contains remainder if anybody wants it */	dsrl	a2,t9		/* restore a2 */	jr	ra#undef	QTEND(bn_div_words)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -