📄 lib1funcs.asm
字号:
/* Copyright (C) 2000, 2001, 2003, 2005 Free Software Foundation, Inc. Contributed by James E. Wilson <wilson@cygnus.com>. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING. If not, write to the Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *//* As a special exception, if you link this library with other files, some of which are compiled with GCC, to produce an executable, this library does not by itself cause the resulting executable to be covered by the GNU General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU General Public License. */#ifdef L__divxf3// Compute a 80-bit IEEE double-extended quotient.//// From the Intel IA-64 Optimization Guide, choose the minimum latency// alternative.//// farg0 holds the dividend. farg1 holds the divisor.//// __divtf3 is an alternate symbol name for backward compatibility. .text .align 16 .global __divxf3 .global __divtf3 .proc __divxf3__divxf3:__divtf3: cmp.eq p7, p0 = r0, r0 frcpa.s0 f10, p6 = farg0, farg1 ;;(p6) cmp.ne p7, p0 = r0, r0 .pred.rel.mutex p6, p7(p6) fnma.s1 f11 = farg1, f10, f1(p6) fma.s1 f12 = farg0, f10, f0 ;;(p6) fma.s1 f13 = f11, f11, f0(p6) fma.s1 f14 = f11, f11, f11 ;;(p6) fma.s1 f11 = f13, f13, f11(p6) fma.s1 f13 = f14, f10, f10 ;;(p6) fma.s1 f10 = f13, f11, f10(p6) fnma.s1 f11 = farg1, f12, farg0 ;;(p6) fma.s1 f11 = f11, f10, f12(p6) fnma.s1 f12 = farg1, f10, f1 ;;(p6) fma.s1 f10 = f12, f10, f10(p6) fnma.s1 f12 = farg1, f11, farg0 ;;(p6) fma.s0 fret0 = f12, f10, f11(p7) mov fret0 = f10 br.ret.sptk rp .endp __divxf3#endif#ifdef L__divdf3// Compute a 64-bit IEEE double quotient.//// From the Intel IA-64 Optimization Guide, choose the minimum latency// alternative.//// farg0 holds the dividend. farg1 holds the divisor. .text .align 16 .global __divdf3 .proc __divdf3__divdf3: cmp.eq p7, p0 = r0, r0 frcpa.s0 f10, p6 = farg0, farg1 ;;(p6) cmp.ne p7, p0 = r0, r0 .pred.rel.mutex p6, p7(p6) fmpy.s1 f11 = farg0, f10(p6) fnma.s1 f12 = farg1, f10, f1 ;;(p6) fma.s1 f11 = f12, f11, f11(p6) fmpy.s1 f13 = f12, f12 ;;(p6) fma.s1 f10 = f12, f10, f10(p6) fma.s1 f11 = f13, f11, f11 ;;(p6) fmpy.s1 f12 = f13, f13(p6) fma.s1 f10 = f13, f10, f10 ;;(p6) fma.d.s1 f11 = f12, f11, f11(p6) fma.s1 f10 = f12, f10, f10 ;;(p6) fnma.d.s1 f8 = farg1, f11, farg0 ;;(p6) fma.d fret0 = f8, f10, f11(p7) mov fret0 = f10 br.ret.sptk rp ;; .endp __divdf3#endif#ifdef L__divsf3// Compute a 32-bit IEEE float quotient.//// From the Intel IA-64 Optimization Guide, choose the minimum latency// alternative.//// farg0 holds the dividend. farg1 holds the divisor. .text .align 16 .global __divsf3 .proc __divsf3__divsf3: cmp.eq p7, p0 = r0, r0 frcpa.s0 f10, p6 = farg0, farg1 ;;(p6) cmp.ne p7, p0 = r0, r0 .pred.rel.mutex p6, p7(p6) fmpy.s1 f8 = farg0, f10(p6) fnma.s1 f9 = farg1, f10, f1 ;;(p6) fma.s1 f8 = f9, f8, f8(p6) fmpy.s1 f9 = f9, f9 ;;(p6) fma.s1 f8 = f9, f8, f8(p6) fmpy.s1 f9 = f9, f9 ;;(p6) fma.d.s1 f10 = f9, f8, f8 ;;(p6) fnorm.s.s0 fret0 = f10(p7) mov fret0 = f10 br.ret.sptk rp ;; .endp __divsf3#endif#ifdef L__divdi3// Compute a 64-bit integer quotient.//// From the Intel IA-64 Optimization Guide, choose the minimum latency// alternative.//// in0 holds the dividend. in1 holds the divisor. .text .align 16 .global __divdi3 .proc __divdi3__divdi3: .regstk 2,0,0,0 // Transfer inputs to FP registers. setf.sig f8 = in0 setf.sig f9 = in1 // Check divide by zero. cmp.ne.unc p0,p7=0,in1 ;; // Convert the inputs to FP, so that they won't be treated as unsigned. fcvt.xf f8 = f8 fcvt.xf f9 = f9(p7) break 1 ;; // Compute the reciprocal approximation. frcpa.s1 f10, p6 = f8, f9 ;; // 3 Newton-Raphson iterations.(p6) fnma.s1 f11 = f9, f10, f1(p6) fmpy.s1 f12 = f8, f10 ;;(p6) fmpy.s1 f13 = f11, f11(p6) fma.s1 f12 = f11, f12, f12 ;;(p6) fma.s1 f10 = f11, f10, f10(p6) fma.s1 f11 = f13, f12, f12 ;;(p6) fma.s1 f10 = f13, f10, f10(p6) fnma.s1 f12 = f9, f11, f8 ;;(p6) fma.s1 f10 = f12, f10, f11 ;; // Round quotient to an integer. fcvt.fx.trunc.s1 f10 = f10 ;; // Transfer result to GP registers. getf.sig ret0 = f10 br.ret.sptk rp ;; .endp __divdi3#endif#ifdef L__moddi3// Compute a 64-bit integer modulus.//// From the Intel IA-64 Optimization Guide, choose the minimum latency// alternative.//// in0 holds the dividend (a). in1 holds the divisor (b). .text .align 16 .global __moddi3 .proc __moddi3__moddi3: .regstk 2,0,0,0 // Transfer inputs to FP registers. setf.sig f14 = in0 setf.sig f9 = in1 // Check divide by zero. cmp.ne.unc p0,p7=0,in1 ;; // Convert the inputs to FP, so that they won't be treated as unsigned. fcvt.xf f8 = f14 fcvt.xf f9 = f9(p7) break 1 ;; // Compute the reciprocal approximation. frcpa.s1 f10, p6 = f8, f9 ;; // 3 Newton-Raphson iterations.(p6) fmpy.s1 f12 = f8, f10(p6) fnma.s1 f11 = f9, f10, f1 ;;(p6) fma.s1 f12 = f11, f12, f12(p6) fmpy.s1 f13 = f11, f11 ;;(p6) fma.s1 f10 = f11, f10, f10(p6) fma.s1 f11 = f13, f12, f12 ;; sub in1 = r0, in1(p6) fma.s1 f10 = f13, f10, f10(p6) fnma.s1 f12 = f9, f11, f8 ;; setf.sig f9 = in1(p6) fma.s1 f10 = f12, f10, f11 ;; fcvt.fx.trunc.s1 f10 = f10 ;; // r = q * (-b) + a xma.l f10 = f10, f9, f14 ;; // Transfer result to GP registers. getf.sig ret0 = f10 br.ret.sptk rp ;; .endp __moddi3#endif#ifdef L__udivdi3// Compute a 64-bit unsigned integer quotient.//// From the Intel IA-64 Optimization Guide, choose the minimum latency// alternative.//// in0 holds the dividend. in1 holds the divisor. .text .align 16 .global __udivdi3 .proc __udivdi3__udivdi3: .regstk 2,0,0,0 // Transfer inputs to FP registers. setf.sig f8 = in0 setf.sig f9 = in1 // Check divide by zero. cmp.ne.unc p0,p7=0,in1 ;; // Convert the inputs to FP, to avoid FP software-assist faults. fcvt.xuf.s1 f8 = f8 fcvt.xuf.s1 f9 = f9(p7) break 1 ;; // Compute the reciprocal approximation. frcpa.s1 f10, p6 = f8, f9 ;; // 3 Newton-Raphson iterations.(p6) fnma.s1 f11 = f9, f10, f1(p6) fmpy.s1 f12 = f8, f10 ;;(p6) fmpy.s1 f13 = f11, f11(p6) fma.s1 f12 = f11, f12, f12 ;;(p6) fma.s1 f10 = f11, f10, f10(p6) fma.s1 f11 = f13, f12, f12 ;;(p6) fma.s1 f10 = f13, f10, f10(p6) fnma.s1 f12 = f9, f11, f8 ;;(p6) fma.s1 f10 = f12, f10, f11 ;; // Round quotient to an unsigned integer. fcvt.fxu.trunc.s1 f10 = f10 ;; // Transfer result to GP registers. getf.sig ret0 = f10 br.ret.sptk rp ;; .endp __udivdi3#endif#ifdef L__umoddi3// Compute a 64-bit unsigned integer modulus.//// From the Intel IA-64 Optimization Guide, choose the minimum latency// alternative.//// in0 holds the dividend (a). in1 holds the divisor (b). .text .align 16 .global __umoddi3 .proc __umoddi3__umoddi3: .regstk 2,0,0,0 // Transfer inputs to FP registers. setf.sig f14 = in0 setf.sig f9 = in1 // Check divide by zero. cmp.ne.unc p0,p7=0,in1 ;; // Convert the inputs to FP, to avoid FP software assist faults. fcvt.xuf.s1 f8 = f14 fcvt.xuf.s1 f9 = f9(p7) break 1; ;; // Compute the reciprocal approximation. frcpa.s1 f10, p6 = f8, f9 ;; // 3 Newton-Raphson iterations.(p6) fmpy.s1 f12 = f8, f10(p6) fnma.s1 f11 = f9, f10, f1 ;;(p6) fma.s1 f12 = f11, f12, f12(p6) fmpy.s1 f13 = f11, f11 ;;(p6) fma.s1 f10 = f11, f10, f10(p6) fma.s1 f11 = f13, f12, f12 ;; sub in1 = r0, in1(p6) fma.s1 f10 = f13, f10, f10(p6) fnma.s1 f12 = f9, f11, f8 ;; setf.sig f9 = in1(p6) fma.s1 f10 = f12, f10, f11 ;; // Round quotient to an unsigned integer. fcvt.fxu.trunc.s1 f10 = f10 ;; // r = q * (-b) + a xma.l f10 = f10, f9, f14 ;; // Transfer result to GP registers. getf.sig ret0 = f10 br.ret.sptk rp ;; .endp __umoddi3#endif#ifdef L__divsi3// Compute a 32-bit integer quotient.//// From the Intel IA-64 Optimization Guide, choose the minimum latency// alternative.//// in0 holds the dividend. in1 holds the divisor. .text .align 16 .global __divsi3 .proc __divsi3__divsi3: .regstk 2,0,0,0 // Check divide by zero. cmp.ne.unc p0,p7=0,in1 sxt4 in0 = in0 sxt4 in1 = in1 ;; setf.sig f8 = in0 setf.sig f9 = in1(p7) break 1 ;; mov r2 = 0x0ffdd fcvt.xf f8 = f8 fcvt.xf f9 = f9
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -