📄 lib1funcs.asm
字号:
;; setf.exp f11 = r2 frcpa.s1 f10, p6 = f8, f9 ;;(p6) fmpy.s1 f8 = f8, f10(p6) fnma.s1 f9 = f9, f10, f1 ;;(p6) fma.s1 f8 = f9, f8, f8(p6) fma.s1 f9 = f9, f9, f11 ;;(p6) fma.s1 f10 = f9, f8, f8 ;; fcvt.fx.trunc.s1 f10 = f10 ;; getf.sig ret0 = f10 br.ret.sptk rp ;; .endp __divsi3#endif#ifdef L__modsi3// Compute a 32-bit integer modulus.//// From the Intel IA-64 Optimization Guide, choose the minimum latency// alternative.//// in0 holds the dividend. in1 holds the divisor. .text .align 16 .global __modsi3 .proc __modsi3__modsi3: .regstk 2,0,0,0 mov r2 = 0x0ffdd sxt4 in0 = in0 sxt4 in1 = in1 ;; setf.sig f13 = r32 setf.sig f9 = r33 // Check divide by zero. cmp.ne.unc p0,p7=0,in1 ;; sub in1 = r0, in1 fcvt.xf f8 = f13 fcvt.xf f9 = f9 ;; setf.exp f11 = r2 frcpa.s1 f10, p6 = f8, f9(p7) break 1 ;;(p6) fmpy.s1 f12 = f8, f10(p6) fnma.s1 f10 = f9, f10, f1 ;; setf.sig f9 = in1(p6) fma.s1 f12 = f10, f12, f12(p6) fma.s1 f10 = f10, f10, f11 ;;(p6) fma.s1 f10 = f10, f12, f12 ;; fcvt.fx.trunc.s1 f10 = f10 ;; xma.l f10 = f10, f9, f13 ;; getf.sig ret0 = f10 br.ret.sptk rp ;; .endp __modsi3#endif#ifdef L__udivsi3// Compute a 32-bit unsigned integer quotient.//// From the Intel IA-64 Optimization Guide, choose the minimum latency// alternative.//// in0 holds the dividend. in1 holds the divisor. .text .align 16 .global __udivsi3 .proc __udivsi3__udivsi3: .regstk 2,0,0,0 mov r2 = 0x0ffdd zxt4 in0 = in0 zxt4 in1 = in1 ;; setf.sig f8 = in0 setf.sig f9 = in1 // Check divide by zero. cmp.ne.unc p0,p7=0,in1 ;; fcvt.xf f8 = f8 fcvt.xf f9 = f9(p7) break 1 ;; setf.exp f11 = r2 frcpa.s1 f10, p6 = f8, f9 ;;(p6) fmpy.s1 f8 = f8, f10(p6) fnma.s1 f9 = f9, f10, f1 ;;(p6) fma.s1 f8 = f9, f8, f8(p6) fma.s1 f9 = f9, f9, f11 ;;(p6) fma.s1 f10 = f9, f8, f8 ;; fcvt.fxu.trunc.s1 f10 = f10 ;; getf.sig ret0 = f10 br.ret.sptk rp ;; .endp __udivsi3#endif#ifdef L__umodsi3// Compute a 32-bit unsigned integer modulus.//// From the Intel IA-64 Optimization Guide, choose the minimum latency// alternative.//// in0 holds the dividend. in1 holds the divisor. .text .align 16 .global __umodsi3 .proc __umodsi3__umodsi3: .regstk 2,0,0,0 mov r2 = 0x0ffdd zxt4 in0 = in0 zxt4 in1 = in1 ;; setf.sig f13 = in0 setf.sig f9 = in1 // Check divide by zero. cmp.ne.unc p0,p7=0,in1 ;; sub in1 = r0, in1 fcvt.xf f8 = f13 fcvt.xf f9 = f9 ;; setf.exp f11 = r2 frcpa.s1 f10, p6 = f8, f9(p7) break 1; ;;(p6) fmpy.s1 f12 = f8, f10(p6) fnma.s1 f10 = f9, f10, f1 ;; setf.sig f9 = in1(p6) fma.s1 f12 = f10, f12, f12(p6) fma.s1 f10 = f10, f10, f11 ;;(p6) fma.s1 f10 = f10, f12, f12 ;; fcvt.fxu.trunc.s1 f10 = f10 ;; xma.l f10 = f10, f9, f13 ;; getf.sig ret0 = f10 br.ret.sptk rp ;; .endp __umodsi3#endif#ifdef L__save_stack_nonlocal// Notes on save/restore stack nonlocal: We read ar.bsp but write// ar.bspstore. This is because ar.bsp can be read at all times// (independent of the RSE mode) but since it's read-only we need to// restore the value via ar.bspstore. This is OK because// ar.bsp==ar.bspstore after executing "flushrs".// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer) .text .align 16 .global __ia64_save_stack_nonlocal .proc __ia64_save_stack_nonlocal__ia64_save_stack_nonlocal: { .mmf alloc r18 = ar.pfs, 2, 0, 0, 0 mov r19 = ar.rsc ;; } { .mmi flushrs st8 [in0] = in1, 24 and r19 = 0x1c, r19 ;; } { .mmi st8 [in0] = r18, -16 mov ar.rsc = r19 or r19 = 0x3, r19 ;; } { .mmi mov r16 = ar.bsp mov r17 = ar.rnat adds r2 = 8, in0 ;; } { .mmi st8 [in0] = r16 st8 [r2] = r17 } { .mib mov ar.rsc = r19 br.ret.sptk.few rp ;; } .endp __ia64_save_stack_nonlocal#endif#ifdef L__nonlocal_goto// void __ia64_nonlocal_goto(void *target_label, void *save_area,// void *static_chain); .text .align 16 .global __ia64_nonlocal_goto .proc __ia64_nonlocal_goto__ia64_nonlocal_goto: { .mmi alloc r20 = ar.pfs, 3, 0, 0, 0 ld8 r12 = [in1], 8 mov.ret.sptk rp = in0, .L0 ;; } { .mmf ld8 r16 = [in1], 8 mov r19 = ar.rsc ;; } { .mmi flushrs ld8 r17 = [in1], 8 and r19 = 0x1c, r19 ;; } { .mmi ld8 r18 = [in1] mov ar.rsc = r19 or r19 = 0x3, r19 ;; } { .mmi mov ar.bspstore = r16 ;; mov ar.rnat = r17 ;; } { .mmi loadrs invala mov r15 = in2 ;; }.L0: { .mib mov ar.rsc = r19 mov ar.pfs = r18 br.ret.sptk.few rp ;; } .endp __ia64_nonlocal_goto#endif#ifdef L__restore_stack_nonlocal// This is mostly the same as nonlocal_goto above.// ??? This has not been tested yet.// void __ia64_restore_stack_nonlocal(void *save_area) .text .align 16 .global __ia64_restore_stack_nonlocal .proc __ia64_restore_stack_nonlocal__ia64_restore_stack_nonlocal: { .mmf alloc r20 = ar.pfs, 4, 0, 0, 0 ld8 r12 = [in0], 8 ;; } { .mmb ld8 r16=[in0], 8 mov r19 = ar.rsc ;; } { .mmi flushrs ld8 r17 = [in0], 8 and r19 = 0x1c, r19 ;; } { .mmf ld8 r18 = [in0] mov ar.rsc = r19 ;; } { .mmi mov ar.bspstore = r16 ;; mov ar.rnat = r17 or r19 = 0x3, r19 ;; } { .mmf loadrs invala ;; }.L0: { .mib mov ar.rsc = r19 mov ar.pfs = r18 br.ret.sptk.few rp ;; } .endp __ia64_restore_stack_nonlocal#endif#ifdef L__trampoline// Implement the nested function trampoline. This is out of line// so that we don't have to bother with flushing the icache, as// well as making the on-stack trampoline smaller.//// The trampoline has the following form://// +-------------------+ >// TRAMP: | __ia64_trampoline | |// +-------------------+ > fake function descriptor// | TRAMP+16 | |// +-------------------+ >// | target descriptor |// +-------------------+// | static link |// +-------------------+ .text .align 16 .global __ia64_trampoline .proc __ia64_trampoline__ia64_trampoline: { .mmi ld8 r2 = [r1], 8 ;; ld8 r15 = [r1] } { .mmi ld8 r3 = [r2], 8 ;; ld8 r1 = [r2] mov b6 = r3 } { .bbb br.sptk.many b6 ;; } .endp __ia64_trampoline#endif// Thunks for backward compatibility.#ifdef L_fixtfdi .text .align 16 .global __fixtfti .proc __fixtfti__fixtfti: { .bbb br.sptk.many __fixxfti ;; } .endp __fixtfti#endif#ifdef L_fixunstfdi .align 16 .global __fixunstfti .proc __fixunstfti__fixunstfti: { .bbb br.sptk.many __fixunsxfti ;; } .endp __fixunstfti#endif#if L_floatditf .align 16 .global __floattitf .proc __floattitf__floattitf: { .bbb br.sptk.many __floattixf ;; } .endp __floattitf#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -