📄 lib1funcs.asm
字号:
!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 .global GLOBAL(udivsi3_i4)GLOBAL(udivsi3_i4): mov #1,r1 cmp/hi r1,r5 bf trivial rotr r1 xor r1,r4 lds r4,fpul mova L1,r0#ifdef FMOVD_WORKS fmov.d @r0+,dr4#else#ifdef __LITTLE_ENDIAN__ fmov.s @r0+,fr5 fmov.s @r0,fr4#else fmov.s @r0+,fr4 fmov.s @r0,fr5#endif#endif float fpul,dr0 xor r1,r5 lds r5,fpul float fpul,dr2 fadd dr4,dr0 fadd dr4,dr2 fdiv dr2,dr0 rts ftrc dr0,fpultrivial: rts lds r4,fpul .align 2#ifdef FMOVD_WORKS .align 3 ! make double below 8 byte aligned.#endifL1: .double 2147483648#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4#if ! __SH5__ || __SH5__ == 32#if __SH5__ .mode SHcompact#endif .global GLOBAL(udivsi3_i4)GLOBAL(udivsi3_i4): mov #1,r1 cmp/hi r1,r5 bf trivial sts.l fpscr,@-r15 mova L1,r0 lds.l @r0+,fpscr rotr r1 xor r1,r4 lds r4,fpul#ifdef FMOVD_WORKS fmov.d @r0+,dr4#else#ifdef __LITTLE_ENDIAN__ fmov.s @r0+,fr5 fmov.s @r0,fr4#else fmov.s @r0+,fr4 fmov.s @r0,fr5#endif#endif float fpul,dr0 xor r1,r5 lds r5,fpul float fpul,dr2 fadd dr4,dr0 fadd dr4,dr2 fdiv dr2,dr0 ftrc dr0,fpul rts lds.l @r15+,fpscr#ifdef FMOVD_WORKS .align 3 ! make double below 8 byte aligned.#endiftrivial: rts lds r4,fpul .align 2L1:#ifndef FMOVD_WORKS .long 0x80000#else .long 0x180000#endif .double 2147483648#endif /* ! __SH5__ || __SH5__ == 32 */#endif /* ! __SH4__ */#endif#ifdef L_udivsi3/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with sh3e code. */#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)!!!! Steve Chamberlain!! sac@cygnus.com!!!!!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit .global GLOBAL(udivsi3)#if __SHMEDIA__#if __SH5__ == 32 .section .text..SHmedia32,"ax"#else .text#endif .align 2/* The assembly code that follows is a hand-optimized version of the C code that follows. Note that the registers that are modified are exactly those listed as clobbered in the patterns udivsi3_i1 and udivsi3_i1_media. unsigned __udivsi3 (i, j) unsigned i, j; { register unsigned long long r0 asm ("r0") = 0; register unsigned long long r18 asm ("r18") = 1; register unsigned long long r4 asm ("r4") = i; register unsigned long long r19 asm ("r19") = j; r19 <<= 31; r18 <<= 31; do if (r4 >= r19) r0 |= r18, r4 -= r19; while (r19 >>= 1, r18 >>= 1); return r0;}*/GLOBAL(udivsi3): pt/l LOCAL(udivsi3_dontadd), tr2 pt/l LOCAL(udivsi3_loop), tr1 ptabs/l r18, tr0 movi 0, r0 movi 1, r18 addz.l r5, r63, r19 addz.l r4, r63, r4 shlli r19, 31, r19 shlli r18, 31, r18LOCAL(udivsi3_loop): bgtu r19, r4, tr2 or r0, r18, r0 sub r4, r19, r4LOCAL(udivsi3_dontadd): shlri r18, 1, r18 shlri r19, 1, r19 bnei r18, 0, tr1 blink tr0, r63#elseGLOBAL(udivsi3):longway: mov #0,r0 div0u ! get one bit from the msb of the numerator into the T ! bit and divide it by whats in r5. Put the answer bit ! into the T bit so it can come out again at the bottom rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0shortway: rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0vshortway: rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4ret: rts mov r4,r0#endif /* ! __SHMEDIA__ */#endif /* __SH4__ */#endif#ifdef L_set_fpscr#if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32#ifdef __SH5__ .mode SHcompact#endif .global GLOBAL(set_fpscr)GLOBAL(set_fpscr): lds r4,fpscr mov.l LOCAL(set_fpscr_L1),r1 swap.w r4,r0 or #24,r0#ifndef FMOVD_WORKS xor #16,r0#endif#if defined(__SH4__) swap.w r0,r3 mov.l r3,@(4,r1)#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */ swap.w r0,r2 mov.l r2,@r1#endif#ifndef FMOVD_WORKS xor #8,r0#else xor #24,r0#endif#if defined(__SH4__) swap.w r0,r2 rts mov.l r2,@r1#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */ swap.w r0,r3 rts mov.l r3,@(4,r1)#endif .align 2LOCAL(set_fpscr_L1): .long GLOBAL(fpscr_values)#ifdef __ELF__ .comm GLOBAL(fpscr_values),8,4#else .comm GLOBAL(fpscr_values),8#endif /* ELF */#endif /* SH3E / SH4 */#endif /* L_set_fpscr */#ifdef L_ic_invalidate#if __SH5__ == 32 .mode SHmedia .section .text..SHmedia32,"ax" .align 2 .global GLOBAL(ic_invalidate)GLOBAL(ic_invalidate): icbi r0, 0 ptabs r18, tr0 synci blink tr0, r63#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) .global GLOBAL(ic_invalidate)GLOBAL(ic_invalidate): ocbwb @r4 mova 0f,r0 mov.w 1f,r1/* Compute how many cache lines 0f is away from r4. */ sub r0,r4 and r1,r4/* Prepare to branch to 0f plus the cache-line offset. */ add # 0f - 1f,r4 braf r4 nop1: .short 0x1fe0 .p2align 5/* This must be aligned to the beginning of a cache line. */0: .rept 256 /* There are 256 cache lines of 32 bytes. */ rts .rept 15 nop .endr .endr#endif /* SH4 */#endif /* L_ic_invalidate */#if defined (__SH5__) && __SH5__ == 32#ifdef L_shcompact_call_trampoline .section .rodata .align 1LOCAL(ct_main_table):.word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label).word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label).word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label).word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label).word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label).word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label) .mode SHmedia .section .text..SHmedia32, "ax" .align 2 /* This function loads 64-bit general-purpose registers from the stack, from a memory address contained in them or from an FP register, according to a cookie passed in r1. Its execution time is linear on the number of registers that actually have to be copied. See sh.h for details on the actual bit pattern. The function to be called is passed in r0. If a 32-bit return value is expected, the actual function will be tail-called, otherwise the return address will be stored in r10 (that the caller should expect to be clobbered) and the return value will be expanded into r2/r3 upon return. */ .global GLOBAL(GCC_shcompact_call_trampoline)GLOBAL(GCC_shcompact_call_trampoline): ptabs/l r0, tr0 /* Prepare to call the actual function. */ movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0 pt/l LOCAL(ct_loop), tr1 addz.l r1, r63, r1 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0LOCAL(ct_loop): nsb r1, r28 shlli r28, 1, r29 ldx.w r0, r29, r30LOCAL(ct_main_label): ptrel/l r30, tr2 blink tr2, r63LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */ /* It must be dr0, so just do it. */ fmov.dq dr0, r2 movi 7, r30 shlli r30, 29, r31 andc r1, r31, r1 blink tr1, r63LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */ /* It is either dr0 or dr2. */ movi 7, r30 shlri r1, 26, r32 shlli r30, 26, r31 andc r1, r31, r1 fmov.dq dr0, r3 beqi/l r32, 4, tr1 fmov.dq dr2, r3 blink tr1, r63LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */ shlri r1, 23 - 3, r34 andi r34, 3 << 3, r33 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32LOCAL(ct_r4_fp_base): ptrel/l r32, tr2 movi 7, r30 shlli r30, 23, r31 andc r1, r31, r1 blink tr2, r63LOCAL(ct_r4_fp_copy): fmov.dq dr0, r4 blink tr1, r63 fmov.dq dr2, r4 blink tr1, r63 fmov.dq dr4, r4 blink tr1, r63LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */ shlri r1, 20 - 3, r34 andi r34, 3 << 3, r33 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32LOCAL(ct_r5_fp_base): ptrel/l r32, tr2 movi 7, r30 shlli r30, 20, r31 andc r1, r31, r1 blink tr2, r63LOCAL(ct_r5_fp_copy): fmov.dq dr0, r5 blink tr1, r63 fmov.dq dr2, r5 blink tr1, r63 fmov.dq dr4, r5 blink tr1, r63 fmov.dq dr6, r5 blink tr1, r63LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */ /* It must be dr8. */ fmov.dq dr8, r6 movi 15, r30 shlli r30, 16, r31 andc r1, r31, r1 blink tr1, r63LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */ shlri r1, 16 - 3, r34 andi r34, 3 << 3, r33 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32LOCAL(ct_r6_fp_base): ptrel/l r32, tr2 movi 7, r30 shlli r30, 16, r31 andc r1, r31, r1 blink tr2, r63LOCAL(ct_r6_fp_copy): fmov.dq dr0, r6 blink tr1, r63 fmov.dq dr2, r6 blink tr1, r63 fmov.dq dr4, r6 blink tr1, r63 fmov.dq dr6, r6 blink tr1, r63LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */ /* It is either dr8 or dr10. */ movi 15 << 12, r31 shlri r1, 12, r32 andc r1, r31, r1 fmov.dq dr8, r7 beqi/l r32, 8, tr1 fmov.dq dr10, r7 blink tr1, r63LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */ shlri r1, 12 - 3, r34 andi r34, 3 << 3, r33 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32LOCAL(ct_r7_fp_base): ptrel/l r32, tr2 movi 7 << 12, r31 andc r1, r31, r1 blink tr2, r63LOCAL(ct_r7_fp_copy): fmov.dq dr0, r7 blink tr1, r63 fmov.dq dr2, r7 blink tr1, r63 fmov.dq dr4, r7 blink tr1, r63 fmov.dq dr6, r7 blink tr1, r63LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */ /* It is either dr8 or dr10. */ movi 15 << 8, r31 andi r1, 1 << 8, r32 andc r1, r31, r1 fmov.dq dr8, r8 beq/l r32, r63, tr1 fmov.dq dr10, r8 blink tr1, r63LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */ shlri r1, 8 - 3, r34 andi r34, 3 << 3, r33 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32LOCAL(ct_r8_fp_base): ptrel/l r32, tr2 movi 7 << 8, r31 andc r1, r31, r1 blink tr2, r63LOCAL(ct_r8_fp_copy): fmov.dq dr0, r8 blink tr1, r63 fmov.dq dr2, r8 blink tr1, r63 fmov.dq dr4, r8 blink tr1, r63 fmov.dq dr6, r8 blink tr1, r63LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */ /* It is either dr8 or dr10. */ movi 15 << 4, r31 andi r1, 1 << 4, r32 andc r1, r31, r1 fmov.dq dr8, r9 beq/l r32, r63, tr1 fmov.dq dr10, r9 blink tr1, r63LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */ shlri r1, 4 - 3, r34 andi r34, 3 << 3, r33 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32LOCAL(ct_r9_fp_base): ptrel/l r32, tr2 movi 7 << 4, r31 andc r1, r31, r1 blink tr2, r63LOCAL(ct_r9_fp_copy): fmov.dq dr0, r9 blink tr1, r63 fmov.dq dr2, r9 blink tr1, r63 fmov.dq dr4, r9 blink tr1, r63 fmov.dq dr6, r9 blink tr1, r63LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */ pt/l LOCAL(ct_r2_load), tr2 movi 3, r30 shlli r30, 29, r31 and r1, r31, r32 andc r1, r31, r1 beq/l r31, r32, tr2 addi.l r2, 8, r3 ldx.q r2, r63, r2 /* Fall through. */LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */ pt/l LOCAL(ct_r3_load), tr2 movi 3, r30 shlli r30, 26, r31
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -