📄 longlong.h
字号:
__asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
(pl) = __m0 * __m1; \
} while (0)
#define SMUL_TIME 14
#define UDIV_TIME 120
#else
#define UMUL_TIME 8
#define smul_ppmm(xh, xl, m0, m1) \
__asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
#define SMUL_TIME 4
#define sdiv_qrnnd(q, r, nh, nl, d) \
__asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
#define UDIV_TIME 100
#endif
#endif /* 32-bit POWER architecture variants. */
/* We should test _IBMR2 here when we add assembly support for the system
vendor compilers. */
#if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 64
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (bh) && (bh) == 0) \
__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
else \
__asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
: "=r" (sh), "=&r" (sl) \
: "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
} while (0)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (ah) && (ah) == 0) \
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
else if (__builtin_constant_p (bh) && (bh) == 0) \
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
else \
__asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
} while (0)
#define count_leading_zeros(count, x) \
__asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
#define COUNT_LEADING_ZEROS_0 64
#define umul_ppmm(ph, pl, m0, m1) \
do { \
UDItype __m0 = (m0), __m1 = (m1); \
__asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
(pl) = __m0 * __m1; \
} while (0)
#define UMUL_TIME 15
#define smul_ppmm(ph, pl, m0, m1) \
do { \
DItype __m0 = (m0), __m1 = (m1); \
__asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
(pl) = __m0 * __m1; \
} while (0)
#define SMUL_TIME 14 /* ??? */
#define UDIV_TIME 120 /* ??? */
#endif /* 64-bit PowerPC. */
#if defined (__pyr__) && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("addw %5,%1\n\taddwc %3,%0" \
: "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
: "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
"%1" ((USItype)(al)), "g" ((USItype)(bl)))
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
__asm__ ("subw %5,%1\n\tsubwb %3,%0" \
: "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
: "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
"1" ((USItype)(al)), "g" ((USItype)(bl)))
/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */
#define umul_ppmm(w1, w0, u, v) \
({union {UDItype __ll; \
struct {USItype __h, __l;} __i; \
} __x; \
__asm__ ("movw %1,%R0\n\tuemul %2,%0" \
: "=&r" (__x.__ll) \
: "g" ((USItype) (u)), "g" ((USItype)(v))); \
(w1) = __x.__i.__h; (w0) = __x.__i.__l;})
#endif /* __pyr__ */
#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("a %1,%5\n\tae %0,%3" \
: "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
: "%0" ((USItype)(ah)), "r" ((USItype)(bh)), \
"%1" ((USItype)(al)), "r" ((USItype)(bl)))
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
__asm__ ("s %1,%5\n\tse %0,%3" \
: "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
: "0" ((USItype)(ah)), "r" ((USItype)(bh)), \
"1" ((USItype)(al)), "r" ((USItype)(bl)))
#define smul_ppmm(ph, pl, m0, m1) \
__asm__ ( \
"s r2,r2\n" \
" mts r10,%2\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" m r2,%3\n" \
" cas %0,r2,r0\n" \
" mfs r10,%1" \
: "=r" ((USItype)(ph)), "=r" ((USItype)(pl)) \
: "%r" ((USItype)(m0)), "r" ((USItype)(m1)) \
: "r2")
#define UMUL_TIME 20
#define UDIV_TIME 200
#define count_leading_zeros(count, x) \
do { \
if ((x) >= 0x10000) \
__asm__ ("clz %0,%1" \
: "=r" ((USItype)(count)) : "r" ((USItype)(x) >> 16)); \
else \
{ \
__asm__ ("clz %0,%1" \
: "=r" ((USItype)(count)) : "r" ((USItype)(x))); \
(count) += 16; \
} \
} while (0)
#endif /* RT/ROMP */
#if defined (__sh2__) && W_TYPE_SIZE == 32
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0" \
: "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach")
#define UMUL_TIME 5
#endif
#if defined (__sparc__) && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
: "=r" (sh), "=&r" (sl) \
: "%rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl) \
__CLOBBER_CC)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
__asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
: "=r" (sh), "=&r" (sl) \
: "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl) \
__CLOBBER_CC)
/* FIXME: When gcc -mcpu=v9 is used on solaris, gcc/config/sol2-sld-64.h
doesn't define anything to indicate that to us, it only sets __sparcv8. */
#if defined (__sparc_v9__) || defined (__sparcv9)
/* Perhaps we should use floating-point operations here? */
#if 0
/* Triggers a bug making mpz/tests/t-gcd.c fail.
Perhaps we simply need explicitly zero-extend the inputs? */
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0" : \
"=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "g1")
#else
/* Use v8 umul until above bug is fixed. */
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
#endif
/* Use a plain v8 divide for v9. */
#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
USItype __q; \
__asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
: "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
(r) = (n0) - __q * (d); \
(q) = __q; \
} while (0)
#else
#if defined (__sparc_v8__) /* gcc normal */ \
|| defined (__sparcv8) /* gcc solaris */
/* Don't match immediate range because, 1) it is not often useful,
2) the 'I' flag thinks of the range as a 13 bit signed interval,
while we want to match a 13 bit interval, sign extended to 32 bits,
but INTERPRETED AS UNSIGNED. */
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
#define UMUL_TIME 5
#if HAVE_HOST_CPU_supersparc
#define UDIV_TIME 60 /* SuperSPARC timing */
#else
/* Don't use this on SuperSPARC because its udiv only handles 53 bit
dividends and will trap to the kernel for the rest. */
#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
USItype __q; \
__asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
: "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
(r) = (n0) - __q * (d); \
(q) = __q; \
} while (0)
#define UDIV_TIME 25
#endif /* HAVE_HOST_CPU_supersparc */
#else /* ! __sparc_v8__ */
#if defined (__sparclite__)
/* This has hardware multiply but not divide. It also has two additional
instructions scan (ffs from high bit) and divscc. */
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
#define UMUL_TIME 5
#define udiv_qrnnd(q, r, n1, n0, d) \
__asm__ ("! Inlined udiv_qrnnd\n" \
" wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
" tst %%g0\n" \
" divscc %3,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%%g1\n" \
" divscc %%g1,%4,%0\n" \
" rd %%y,%1\n" \
" bl,a 1f\n" \
" add %1,%4,%1\n" \
"1: ! End of inline udiv_qrnnd" \
: "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d) \
: "%g1" __AND_CLOBBER_CC)
#define UDIV_TIME 37
#define count_leading_zeros(count, x) \
__asm__ ("scan %1,1,%0" : "=r" (count) : "r" (x))
/* Early sparclites return 63 for an argument of 0, but they warn that future
implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
undefined. */
#endif /* __sparclite__ */
#endif /* __sparc_v8__ */
#endif /* __sparc_v9__ */
/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
#ifndef umul_ppmm
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("! Inlined umul_ppmm\n" \
" wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \
" sra %3,31,%%g2 ! Don't move this insn\n" \
" and %2,%%g2,%%g2 ! Don't move this insn\n" \
" andcc %%g0,0,%%g1 ! Don't move this insn\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,%3,%%g1\n" \
" mulscc %%g1,0,%%g1\n" \
" add %%g1,%%g2,%0\n" \
" rd %%y,%1" \
: "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v) \
: "%g1", "%g2" __AND_CLOBBER_CC)
#define UMUL_TIME 39 /* 39 instructions */
#endif
#ifndef udiv_qrnnd
#ifndef LONGLONG_STANDALONE
#define udiv_qrnnd(q, r, n1, n0, d) \
do { UWtype __r; \
(q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
(r) = __r; \
} while (0)
extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
#ifndef UDIV_TIME
#define UDIV_TIME 140
#endif
#endif /* LONGLONG_STANDALONE */
#endif /* udiv_qrnnd */
#endif /* __sparc__ */
#if defined (__sparc__) && W_TYPE_SIZE == 64
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ( \
"addcc %r4,%5,%1\n" \
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -