⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lib1funcs.asm

📁 gcc3.2.1源代码
💻 ASM
📖 第 1 页 / 共 4 页
字号:
!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4	.global	GLOBAL(udivsi3_i4)GLOBAL(udivsi3_i4):	mov #1,r1	cmp/hi r1,r5	bf trivial	rotr r1	xor r1,r4	lds r4,fpul	mova L1,r0#ifdef FMOVD_WORKS	fmov.d @r0+,dr4#else#ifdef __LITTLE_ENDIAN__	fmov.s @r0+,fr5	fmov.s @r0,fr4#else	fmov.s @r0+,fr4	fmov.s @r0,fr5#endif#endif	float fpul,dr0	xor r1,r5	lds r5,fpul	float fpul,dr2	fadd dr4,dr0	fadd dr4,dr2	fdiv dr2,dr0	rts	ftrc dr0,fpultrivial:	rts	lds r4,fpul	.align 2#ifdef FMOVD_WORKS	.align 3	! make double below 8 byte aligned.#endifL1:	.double 2147483648#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4#if ! __SH5__ || __SH5__ == 32#if __SH5__	.mode	SHcompact#endif	.global	GLOBAL(udivsi3_i4)GLOBAL(udivsi3_i4):	mov #1,r1	cmp/hi r1,r5	bf trivial	sts.l fpscr,@-r15	mova L1,r0	lds.l @r0+,fpscr	rotr r1	xor r1,r4	lds r4,fpul#ifdef FMOVD_WORKS	fmov.d @r0+,dr4#else#ifdef __LITTLE_ENDIAN__	fmov.s @r0+,fr5	fmov.s @r0,fr4#else	fmov.s @r0+,fr4	fmov.s @r0,fr5#endif#endif	float fpul,dr0	xor r1,r5	lds r5,fpul	float fpul,dr2	fadd dr4,dr0	fadd dr4,dr2	fdiv dr2,dr0	ftrc dr0,fpul	rts	lds.l @r15+,fpscr#ifdef FMOVD_WORKS	.align 3	! make double below 8 byte aligned.#endiftrivial:	rts	lds r4,fpul	.align 2L1:#ifndef FMOVD_WORKS	.long 0x80000#else	.long 0x180000#endif	.double 2147483648#endif /* ! __SH5__ || __SH5__ == 32 */#endif /* ! __SH4__ */#endif#ifdef L_udivsi3/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with   sh3e code.  */#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)!!!! Steve Chamberlain!! sac@cygnus.com!!!!!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit	.global	GLOBAL(udivsi3)#if __SHMEDIA__#if __SH5__ == 32	.section	.text..SHmedia32,"ax"#else	.text#endif	.align	2/* The assembly code that follows is a hand-optimized version of the C   code that follows.  Note that the registers that are modified are   exactly those listed as clobbered in the patterns udivsi3_i1 and   udivsi3_i1_media.	unsigned __udivsi3 (i, j)    unsigned i, j; {  register unsigned long long r0 asm ("r0") = 0;  register unsigned long long r18 asm ("r18") = 1;  register unsigned long long r4 asm ("r4") = i;  register unsigned long long r19 asm ("r19") = j;  r19 <<= 31;  r18 <<= 31;  do    if (r4 >= r19)      r0 |= r18, r4 -= r19;  while (r19 >>= 1, r18 >>= 1);  return r0;}*/GLOBAL(udivsi3):	pt/l	LOCAL(udivsi3_dontadd), tr2	pt/l	LOCAL(udivsi3_loop), tr1	ptabs/l	r18, tr0	movi	0, r0	movi	1, r18	addz.l	r5, r63, r19	addz.l	r4, r63, r4	shlli	r19, 31, r19	shlli	r18, 31, r18LOCAL(udivsi3_loop):	bgtu	r19, r4, tr2	or	r0, r18, r0	sub	r4, r19, r4LOCAL(udivsi3_dontadd):	shlri	r18, 1, r18	shlri	r19, 1, r19	bnei	r18, 0, tr1	blink	tr0, r63#elseGLOBAL(udivsi3):longway:	mov	#0,r0	div0u	! get one bit from the msb of the numerator into the T	! bit and divide it by whats in r5.  Put the answer bit	! into the T bit so it can come out again at the bottom	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0shortway:	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0vshortway:	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4 ; div1 r5,r0	rotcl	r4ret:	rts	mov	r4,r0#endif /* ! __SHMEDIA__ */#endif /* __SH4__ */#endif#ifdef L_set_fpscr#if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32#ifdef __SH5__	.mode	SHcompact#endif	.global GLOBAL(set_fpscr)GLOBAL(set_fpscr):	lds r4,fpscr	mov.l LOCAL(set_fpscr_L1),r1	swap.w r4,r0	or #24,r0#ifndef FMOVD_WORKS	xor #16,r0#endif#if defined(__SH4__)	swap.w r0,r3	mov.l r3,@(4,r1)#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */	swap.w r0,r2	mov.l r2,@r1#endif#ifndef FMOVD_WORKS	xor #8,r0#else	xor #24,r0#endif#if defined(__SH4__)	swap.w r0,r2	rts	mov.l r2,@r1#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */	swap.w r0,r3	rts	mov.l r3,@(4,r1)#endif	.align 2LOCAL(set_fpscr_L1):	.long GLOBAL(fpscr_values)#ifdef __ELF__        .comm   GLOBAL(fpscr_values),8,4#else        .comm   GLOBAL(fpscr_values),8#endif /* ELF */#endif /* SH3E / SH4 */#endif /* L_set_fpscr */#ifdef L_ic_invalidate#if __SH5__ == 32	.mode	SHmedia	.section	.text..SHmedia32,"ax"	.align	2	.global	GLOBAL(ic_invalidate)GLOBAL(ic_invalidate):	icbi	r0, 0	ptabs	r18, tr0	synci	blink	tr0, r63#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)	.global GLOBAL(ic_invalidate)GLOBAL(ic_invalidate):	ocbwb	@r4	mova	0f,r0	mov.w	1f,r1/* Compute how many cache lines 0f is away from r4.  */	sub	r0,r4	and	r1,r4/* Prepare to branch to 0f plus the cache-line offset.  */	add	# 0f - 1f,r4	braf	r4	nop1:	.short	0x1fe0	.p2align 5/* This must be aligned to the beginning of a cache line.  */0:	.rept	256 /* There are 256 cache lines of 32 bytes.  */	rts	.rept	15	nop	.endr	.endr#endif /* SH4 */#endif /* L_ic_invalidate */#if defined (__SH5__) && __SH5__ == 32#ifdef L_shcompact_call_trampoline	.section	.rodata	.align	1LOCAL(ct_main_table):.word	LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label).word	LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)	.mode	SHmedia	.section	.text..SHmedia32, "ax"	.align	2	     /* This function loads 64-bit general-purpose registers from the	stack, from a memory address contained in them or from an FP	register, according to a cookie passed in r1.  Its execution	time is linear on the number of registers that actually have	to be copied.  See sh.h for details on the actual bit pattern.	The function to be called is passed in r0.  If a 32-bit return	value is expected, the actual function will be tail-called,	otherwise the return address will be stored in r10 (that the	caller should expect to be clobbered) and the return value	will be expanded into r2/r3 upon return.  */		.global	GLOBAL(GCC_shcompact_call_trampoline)GLOBAL(GCC_shcompact_call_trampoline):	ptabs/l	r0, tr0	/* Prepare to call the actual function.  */	movi	((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0	pt/l	LOCAL(ct_loop), tr1	addz.l	r1, r63, r1	shori	((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0LOCAL(ct_loop):	nsb	r1, r28	shlli	r28, 1, r29	ldx.w	r0, r29, r30LOCAL(ct_main_label):	ptrel/l	r30, tr2	blink	tr2, r63LOCAL(ct_r2_fp):	/* Copy r2 from an FP register.  */	/* It must be dr0, so just do it.  */	fmov.dq	dr0, r2	movi	7, r30	shlli	r30, 29, r31	andc	r1, r31, r1	blink	tr1, r63LOCAL(ct_r3_fp):	/* Copy r3 from an FP register.  */	/* It is either dr0 or dr2.  */	movi	7, r30	shlri	r1, 26, r32	shlli	r30, 26, r31	andc	r1, r31, r1	fmov.dq	dr0, r3	beqi/l	r32, 4, tr1	fmov.dq	dr2, r3	blink	tr1, r63LOCAL(ct_r4_fp):	/* Copy r4 from an FP register.  */	shlri	r1, 23 - 3, r34	andi	r34, 3 << 3, r33	addi	r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32LOCAL(ct_r4_fp_base):	ptrel/l	r32, tr2	movi	7, r30	shlli	r30, 23, r31	andc	r1, r31, r1	blink	tr2, r63LOCAL(ct_r4_fp_copy):	fmov.dq	dr0, r4	blink	tr1, r63	fmov.dq	dr2, r4	blink	tr1, r63	fmov.dq	dr4, r4	blink	tr1, r63LOCAL(ct_r5_fp):	/* Copy r5 from an FP register.  */	shlri	r1, 20 - 3, r34	andi	r34, 3 << 3, r33	addi	r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32LOCAL(ct_r5_fp_base):	ptrel/l	r32, tr2	movi	7, r30	shlli	r30, 20, r31	andc	r1, r31, r1	blink	tr2, r63LOCAL(ct_r5_fp_copy):	fmov.dq	dr0, r5	blink	tr1, r63	fmov.dq	dr2, r5	blink	tr1, r63	fmov.dq	dr4, r5	blink	tr1, r63	fmov.dq	dr6, r5	blink	tr1, r63LOCAL(ct_r6_fph):	/* Copy r6 from a high FP register.  */	/* It must be dr8.  */	fmov.dq	dr8, r6	movi	15, r30	shlli	r30, 16, r31	andc	r1, r31, r1	blink	tr1, r63LOCAL(ct_r6_fpl):	/* Copy r6 from a low FP register.  */	shlri	r1, 16 - 3, r34	andi	r34, 3 << 3, r33	addi	r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32LOCAL(ct_r6_fp_base):	ptrel/l	r32, tr2	movi	7, r30	shlli	r30, 16, r31	andc	r1, r31, r1	blink	tr2, r63LOCAL(ct_r6_fp_copy):	fmov.dq	dr0, r6	blink	tr1, r63	fmov.dq	dr2, r6	blink	tr1, r63	fmov.dq	dr4, r6	blink	tr1, r63	fmov.dq	dr6, r6	blink	tr1, r63LOCAL(ct_r7_fph):	/* Copy r7 from a high FP register.  */	/* It is either dr8 or dr10.  */	movi	15 << 12, r31	shlri	r1, 12, r32	andc	r1, r31, r1	fmov.dq	dr8, r7	beqi/l	r32, 8, tr1	fmov.dq	dr10, r7	blink	tr1, r63LOCAL(ct_r7_fpl):	/* Copy r7 from a low FP register.  */	shlri	r1, 12 - 3, r34	andi	r34, 3 << 3, r33	addi	r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32LOCAL(ct_r7_fp_base):	ptrel/l	r32, tr2	movi	7 << 12, r31	andc	r1, r31, r1	blink	tr2, r63LOCAL(ct_r7_fp_copy):	fmov.dq	dr0, r7	blink	tr1, r63	fmov.dq	dr2, r7	blink	tr1, r63	fmov.dq	dr4, r7	blink	tr1, r63	fmov.dq	dr6, r7	blink	tr1, r63LOCAL(ct_r8_fph):	/* Copy r8 from a high FP register.  */	/* It is either dr8 or dr10.  */	movi	15 << 8, r31	andi	r1, 1 << 8, r32	andc	r1, r31, r1	fmov.dq	dr8, r8	beq/l	r32, r63, tr1	fmov.dq	dr10, r8	blink	tr1, r63LOCAL(ct_r8_fpl):	/* Copy r8 from a low FP register.  */	shlri	r1, 8 - 3, r34	andi	r34, 3 << 3, r33	addi	r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32LOCAL(ct_r8_fp_base):	ptrel/l	r32, tr2	movi	7 << 8, r31	andc	r1, r31, r1	blink	tr2, r63LOCAL(ct_r8_fp_copy):	fmov.dq	dr0, r8	blink	tr1, r63	fmov.dq	dr2, r8	blink	tr1, r63	fmov.dq	dr4, r8	blink	tr1, r63	fmov.dq	dr6, r8	blink	tr1, r63LOCAL(ct_r9_fph):	/* Copy r9 from a high FP register.  */	/* It is either dr8 or dr10.  */	movi	15 << 4, r31	andi	r1, 1 << 4, r32	andc	r1, r31, r1	fmov.dq	dr8, r9	beq/l	r32, r63, tr1	fmov.dq	dr10, r9	blink	tr1, r63LOCAL(ct_r9_fpl):	/* Copy r9 from a low FP register.  */	shlri	r1, 4 - 3, r34	andi	r34, 3 << 3, r33	addi	r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32LOCAL(ct_r9_fp_base):	ptrel/l	r32, tr2	movi	7 << 4, r31	andc	r1, r31, r1	blink	tr2, r63LOCAL(ct_r9_fp_copy):	fmov.dq	dr0, r9	blink	tr1, r63	fmov.dq	dr2, r9	blink	tr1, r63	fmov.dq	dr4, r9	blink	tr1, r63	fmov.dq	dr6, r9	blink	tr1, r63LOCAL(ct_r2_ld):	/* Copy r2 from a memory address.  */	pt/l	LOCAL(ct_r2_load), tr2	movi	3, r30	shlli	r30, 29, r31	and	r1, r31, r32	andc	r1, r31, r1	beq/l	r31, r32, tr2	addi.l	r2, 8, r3	ldx.q	r2, r63, r2	/* Fall through.  */LOCAL(ct_r3_ld):	/* Copy r3 from a memory address.  */	pt/l	LOCAL(ct_r3_load), tr2	movi	3, r30	shlli	r30, 26, r31

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -