📄 multi_arith.h
字号:
/* multi_arith.h: multi-precision integer arithmetic functions, needed to do extended-precision floating point. (c) 1998 David Huggins-Daines. Somewhat based on arch/alpha/math-emu/ieee-math.c, which is (c) David Mosberger-Tang. You may copy, modify, and redistribute this file under the terms of the GNU General Public License, version 2, or any later version, at your convenience. *//* Note: These are not general multi-precision math routines. Rather, they implement the subset of integer arithmetic that we need in order to multiply, divide, and normalize 128-bit unsigned mantissae. */#ifndef MULTI_ARITH_H#define MULTI_ARITH_H#if 0 /* old code... *//* Unsigned only, because we don't need signs to multiply and divide. */typedef unsigned int int128[4];/* Word order */enum { MSW128, NMSW128, NLSW128, LSW128};/* big-endian */#define LO_WORD(ll) (((unsigned int *) &ll)[1])#define HI_WORD(ll) (((unsigned int *) &ll)[0])/* Convenience functions to stuff various integer values into int128s */extern inline void zero128(int128 a){ a[LSW128] = a[NLSW128] = a[NMSW128] = a[MSW128] = 0;}/* Human-readable word order in the arguments */extern inline void set128(unsigned int i3, unsigned int i2, unsigned int i1, unsigned int i0, int128 a){ a[LSW128] = i0; a[NLSW128] = i1; a[NMSW128] = i2; a[MSW128] = i3;}/* Convenience functions (for testing as well) */extern inline void int64_to_128(unsigned long long src, int128 dest){ dest[LSW128] = (unsigned int) src; dest[NLSW128] = src >> 32; dest[NMSW128] = dest[MSW128] = 0;}extern inline void int128_to_64(const int128 src, unsigned long long *dest){ *dest = src[LSW128] | (long long) src[NLSW128] << 32;}extern inline void put_i128(const int128 a){ printk("%08x %08x %08x %08x\n", a[MSW128], a[NMSW128], a[NLSW128], a[LSW128]);}/* Internal shifters: Note that these are only good for 0 < count < 32. */extern inline void _lsl128(unsigned int count, int128 a){ a[MSW128] = (a[MSW128] << count) | (a[NMSW128] >> (32 - count)); a[NMSW128] = (a[NMSW128] << count) | (a[NLSW128] >> (32 - count)); a[NLSW128] = (a[NLSW128] << count) | (a[LSW128] >> (32 - count)); a[LSW128] <<= count;}extern inline void _lsr128(unsigned int count, int128 a){ a[LSW128] = (a[LSW128] >> count) | (a[NLSW128] << (32 - count)); a[NLSW128] = (a[NLSW128] >> count) | (a[NMSW128] << (32 - count)); a[NMSW128] = (a[NMSW128] >> count) | (a[MSW128] << (32 - count)); a[MSW128] >>= count;}/* Should be faster, one would hope */extern inline void lslone128(int128 a){ asm volatile ("lsl.l #1,%0\n" "roxl.l #1,%1\n" "roxl.l #1,%2\n" "roxl.l #1,%3\n" : "=d" (a[LSW128]), "=d"(a[NLSW128]), "=d"(a[NMSW128]), "=d"(a[MSW128]) : "0"(a[LSW128]), "1"(a[NLSW128]), "2"(a[NMSW128]), "3"(a[MSW128]));}extern inline void lsrone128(int128 a){ asm volatile ("lsr.l #1,%0\n" "roxr.l #1,%1\n" "roxr.l #1,%2\n" "roxr.l #1,%3\n" : "=d" (a[MSW128]), "=d"(a[NMSW128]), "=d"(a[NLSW128]), "=d"(a[LSW128]) : "0"(a[MSW128]), "1"(a[NMSW128]), "2"(a[NLSW128]), "3"(a[LSW128]));}/* Generalized 128-bit shifters: These bit-shift to a multiple of 32, then move whole longwords. */extern inline void lsl128(unsigned int count, int128 a){ int wordcount, i; if (count % 32) _lsl128(count % 32, a); if (0 == (wordcount = count / 32)) return; /* argh, gak, endian-sensitive */ for (i = 0; i < 4 - wordcount; i++) { a[i] = a[i + wordcount]; } for (i = 3; i >= 4 - wordcount; --i) { a[i] = 0; }}extern inline void lsr128(unsigned int count, int128 a){ int wordcount, i; if (count % 32) _lsr128(count % 32, a); if (0 == (wordcount = count / 32)) return; for (i = 3; i >= wordcount; --i) { a[i] = a[i - wordcount]; } for (i = 0; i < wordcount; i++) { a[i] = 0; }}extern inline int orl128(int a, int128 b){ b[LSW128] |= a;}extern inline int btsthi128(const int128 a){ return a[MSW128] & 0x80000000;}/* test bits (numbered from 0 = LSB) up to and including "top" */extern inline int bftestlo128(int top, const int128 a){ int r = 0; if (top > 31) r |= a[LSW128]; if (top > 63) r |= a[NLSW128]; if (top > 95) r |= a[NMSW128]; r |= a[3 - (top / 32)] & ((1 << (top % 32 + 1)) - 1); return (r != 0);}/* Aargh. We need these because GCC is broken *//* FIXME: do them in assembly, for goodness' sake! */extern inline void mask64(int pos, unsigned long long *mask){ *mask = 0; if (pos < 32) { LO_WORD(*mask) = (1 << pos) - 1; return; } LO_WORD(*mask) = -1; HI_WORD(*mask) = (1 << (pos - 32)) - 1;}extern inline void bset64(int pos, unsigned long long *dest){ /* This conditional will be optimized away. Thanks, GCC! */ if (pos < 32) asm volatile ("bset %1,%0":"=m" (LO_WORD(*dest)):"id"(pos)); else asm volatile ("bset %1,%0":"=m" (HI_WORD(*dest)):"id"(pos - 32));}extern inline int btst64(int pos, unsigned long long dest){ if (pos < 32) return (0 != (LO_WORD(dest) & (1 << pos))); else return (0 != (HI_WORD(dest) & (1 << (pos - 32))));}extern inline void lsl64(int count, unsigned long long *dest){ if (count < 32) { HI_WORD(*dest) = (HI_WORD(*dest) << count) | (LO_WORD(*dest) >> count); LO_WORD(*dest) <<= count; return; } count -= 32; HI_WORD(*dest) = LO_WORD(*dest) << count; LO_WORD(*dest) = 0;}extern inline void lsr64(int count, unsigned long long *dest){ if (count < 32) { LO_WORD(*dest) = (LO_WORD(*dest) >> count) | (HI_WORD(*dest) << (32 - count)); HI_WORD(*dest) >>= count; return; } count -= 32; LO_WORD(*dest) = HI_WORD(*dest) >> count; HI_WORD(*dest) = 0;}#endifextern inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt){ reg->exp += cnt; switch (cnt) { case 0 ... 8: reg->lowmant = reg->mant.m32[1] << (8 - cnt); reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) | (reg->mant.m32[0] << (32 - cnt)); reg->mant.m32[0] = reg->mant.m32[0] >> cnt; break; case 9 ... 32: reg->lowmant = reg->mant.m32[1] >> (cnt - 8); if (reg->mant.m32[1] << (40 - cnt)) reg->lowmant |= 1; reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) | (reg->mant.m32[0] << (32 - cnt)); reg->mant.m32[0] = reg->mant.m32[0] >> cnt; break; case 33 ... 39: asm volatile ("bfextu %1{%2,#8},%0" : "=d" (reg->lowmant) : "m" (reg->mant.m32[0]), "d" (64 - cnt)); if (reg->mant.m32[1] << (40 - cnt)) reg->lowmant |= 1; reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32); reg->mant.m32[0] = 0; break; case 40 ... 71: reg->lowmant = reg->mant.m32[0] >> (cnt - 40); if ((reg->mant.m32[0] << (72 - cnt)) || reg->mant.m32[1]) reg->lowmant |= 1; reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32); reg->mant.m32[0] = 0; break; default: reg->lowmant = reg->mant.m32[0] || reg->mant.m32[1]; reg->mant.m32[0] = 0; reg->mant.m32[1] = 0; break; }}extern inline int fp_overnormalize(struct fp_ext *reg){ int shift; if (reg->mant.m32[0]) { asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[0])); reg->mant.m32[0] = (reg->mant.m32[0] << shift) | (reg->mant.m32[1] >> (32 - shift)); reg->mant.m32[1] = (reg->mant.m32[1] << shift); } else { asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[1])); reg->mant.m32[0] = (reg->mant.m32[1] << shift); reg->mant.m32[1] = 0; shift += 32; } return shift;}extern inline int fp_addmant(struct fp_ext *dest, struct fp_ext *src){ int carry; /* we assume here, gcc only insert move and a clr instr */ asm volatile ("add.b %1,%0" : "=d,g" (dest->lowmant) : "g,d" (src->lowmant), "0,0" (dest->lowmant)); asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[1]) : "d" (src->mant.m32[1]), "0" (dest->mant.m32[1])); asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[0]) : "d" (src->mant.m32[0]), "0" (dest->mant.m32[0])); asm volatile ("addx.l %0,%0" : "=d" (carry) : "0" (0)); return carry;}extern inline int fp_addcarry(struct fp_ext *reg){ if (++reg->exp == 0x7fff) { if (reg->mant.m64) fp_set_sr(FPSR_EXC_INEX2); reg->mant.m64 = 0; fp_set_sr(FPSR_EXC_OVFL); return 0; } reg->lowmant = (reg->mant.m32[1] << 7) | (reg->lowmant ? 1 : 0); reg->mant.m32[1] = (reg->mant.m32[1] >> 1) | (reg->mant.m32[0] << 31); reg->mant.m32[0] = (reg->mant.m32[0] >> 1) | 0x80000000; return 1;}extern inline void fp_submant(struct fp_ext *dest, struct fp_ext *src1, struct fp_ext *src2){ /* we assume here, gcc only insert move and a clr instr */ asm volatile ("sub.b %1,%0" : "=d,g" (dest->lowmant) : "g,d" (src2->lowmant), "0,0" (src1->lowmant)); asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[1]) : "d" (src2->mant.m32[1]), "0" (src1->mant.m32[1])); asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[0]) : "d" (src2->mant.m32[0]), "0" (src1->mant.m32[0]));}#define fp_mul64(desth, destl, src1, src2) ({ \ asm ("mulu.l %2,%1:%0" : "=d" (destl), "=d" (desth) \ : "g" (src1), "0" (src2)); \})#define fp_div64(quot, rem, srch, srcl, div) \ asm ("divu.l %2,%1:%0" : "=d" (quot), "=d" (rem) \ : "dm" (div), "1" (srch), "0" (srcl))#define fp_add64(dest1, dest2, src1, src2) ({ \ asm ("add.l %1,%0" : "=d,=dm" (dest2) \ : "dm,d" (src2), "0,0" (dest2)); \ asm ("addx.l %1,%0" : "=d" (dest1) \ : "d" (src1), "0" (dest1)); \})#define fp_addx96(dest, src) ({ \ /* we assume here, gcc only insert move and a clr instr */ \ asm volatile ("add.l %1,%0" : "=d,g" (dest->m32[2]) \ : "g,d" (temp.m32[1]), "0,0" (dest->m32[2])); \ asm volatile ("addx.l %1,%0" : "=d" (dest->m32[1]) \ : "d" (temp.m32[0]), "0" (dest->m32[1])); \ asm volatile ("addx.l %1,%0" : "=d" (dest->m32[0]) \ : "d" (0), "0" (dest->m32[0])); \})#define fp_sub64(dest, src) ({ \ asm ("sub.l %1,%0" : "=d,=dm" (dest.m32[1]) \ : "dm,d" (src.m32[1]), "0,0" (dest.m32[1])); \ asm ("subx.l %1,%0" : "=d" (dest.m32[0]) \ : "d" (src.m32[0]), "0" (dest.m32[0])); \})#define fp_sub96c(dest, srch, srcm, srcl) ({ \ char carry; \ asm ("sub.l %1,%0" : "=d,=dm" (dest.m32[2]) \ : "dm,d" (srcl), "0,0" (dest.m32[2])); \ asm ("subx.l %1,%0" : "=d" (dest.m32[1]) \ : "d" (srcm), "0" (dest.m32[1])); \ asm ("subx.l %2,%1; scs %0" : "=d" (carry), "=d" (dest.m32[0]) \ : "d" (srch), "1" (dest.m32[0])); \ carry; \})extern inline void fp_multiplymant(union fp_mant128 *dest, struct fp_ext *src1, struct fp_ext *src2){ union fp_mant64 temp;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -