📄 round.c
字号:
/* Copyright (C) 2006 Dave Nomura dcnltc@us.ibm.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. The GNU General Public License is contained in the file COPYING.*/#include <stdio.h>#include <stdlib.h>#include <limits.h>typedef enum { FALSE=0, TRUE } bool_t;typedef enum { FADDS, FSUBS, FMULS, FDIVS, FMADDS, FMSUBS, FNMADDS, FNMSUBS, FADD, FSUB, FMUL, FDIV, FMADD, FMSUB, FNMADD, FNMSUB, FSQRT} flt_op_t;typedef enum { TO_NEAREST=0, TO_ZERO, TO_PLUS_INFINITY, TO_MINUS_INFINITY } round_mode_t;char *round_mode_name[] = { "near", "zero", "+inf", "-inf" };const char *flt_op_names[] = { "fadds", "fsubs", "fmuls", "fdivs", "fmadds", "fmsubs", "fnmadds", "fnmsubs", "fadd", "fsub", "fmul", "fdiv", "fmadd", "fmsub", "fnmadd", "fnmsub", "fsqrt"};typedef unsigned int fpscr_t;typedef union { float flt; struct { unsigned int sign:1; unsigned int exp:8; unsigned int frac:23; } layout;} flt_overlay;typedef union { double dbl; struct { unsigned int sign:1; unsigned int exp:11; unsigned int frac_hi:20; unsigned int frac_lo:32; } layout; struct { unsigned int hi; unsigned int lo; } dbl_pair;} dbl_overlay;void assert_fail(const char *msg, const char* expr, const char* file, int line, const char*fn);#define STRING(__str) #__str#define assert(msg, expr) \ ((void) ((expr) ? 0 : \ (assert_fail (msg, STRING(expr), \ __FILE__, __LINE__, \ __PRETTY_FUNCTION__), 0)))float denorm_small;double dbl_denorm_small;float norm_small;bool_t debug = FALSE;bool_t long_is_64_bits = sizeof(long) == 8;void assert_fail (msg, expr, file, line, fn)const char* msg;const char* expr;const char* file;int line;const char*fn;{ printf( "\n%s: %s:%d (%s): Assertion `%s' failed.\n", msg, file, line, fn, expr ); exit( 1 );}void set_rounding_mode(round_mode_t mode){ switch(mode) { case TO_NEAREST: asm volatile("mtfsfi 7, 0"); break; case TO_ZERO: asm volatile("mtfsfi 7, 1"); break; case TO_PLUS_INFINITY: asm volatile("mtfsfi 7, 2"); break; case TO_MINUS_INFINITY: asm volatile("mtfsfi 7, 3"); break; }}void print_double(char *msg, double dbl){ dbl_overlay D; D.dbl = dbl; printf("%15s : dbl %-20a = %c(%4d, %05x%08x)\n", msg, D.dbl, (D.layout.sign == 0 ? '+' : '-'), D.layout.exp, D.layout.frac_hi, D.layout.frac_lo);}void print_single(char *msg, float *flt){ flt_overlay F; F.flt = *flt; /* NOTE: for the purposes of comparing the fraction of a single with ** a double left shift the .frac so that hex digits are grouped ** from left to right. this is necessary because the size of a ** single mantissa (23) bits is not a multiple of 4 */ printf("%15s : flt %-20a = %c(%4d, %06x)\n", msg, F.flt, (F.layout.sign == 0 ? '+' : '-'), F.layout.exp, F.layout.frac << 1);}int check_dbl_to_flt_round(round_mode_t mode, double dbl, float *expected){ int status = 0; flt_overlay R, E; char *result; char *eq_ne; set_rounding_mode(mode); E.flt = *expected; R.flt = (float)dbl; if ((R.layout.sign != E.layout.sign) || (R.layout.exp != E.layout.exp) || (R.layout.frac != E.layout.frac)) { result = "FAILED"; eq_ne = "!="; status = 1; } else { result = "PASSED"; eq_ne = "=="; status = 0; } printf("%s:%s:(double)(%-20a) = %20a", round_mode_name[mode], result, R.flt, dbl); if (status) { print_single("\n\texpected", &E.flt); print_single("\n\trounded ", &R.flt); } putchar('\n'); return status;}int test_dbl_to_float_convert(char *msg, float *base){ int status = 0; double half = (double)denorm_small/2; double qtr = half/2; double D_hi = (double)*base + half + qtr; double D_lo = (double)*base + half - qtr; float F_lo = *base; float F_hi = F_lo + denorm_small; /* ** .....+-----+-----+-----+-----+---.... ** ^F_lo ^ ^ ^ ** D_lo ** D_hi ** F_hi ** F_lo and F_hi are two consecutive single float model numbers ** denorm_small distance apart. D_lo and D_hi are two numbers ** within that range that are not representable as single floats ** and will be rounded to either F_lo or F_hi. */ printf("-------------------------- %s --------------------------\n", msg); if (debug) { print_double("D_lo", D_lo); print_double("D_hi", D_hi); print_single("F_lo", &F_lo); print_single("F_hi", &F_hi); } /* round to nearest */ status |= check_dbl_to_flt_round(TO_NEAREST, D_hi, &F_hi); status |= check_dbl_to_flt_round(TO_NEAREST, D_lo, &F_lo); /* round to zero */ status |= check_dbl_to_flt_round(TO_ZERO, D_hi, (D_hi > 0 ? &F_lo : &F_hi)); status |= check_dbl_to_flt_round(TO_ZERO, D_lo, (D_hi > 0 ? &F_lo : &F_hi)); /* round to +inf */ status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_hi, &F_hi); status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_lo, &F_hi); /* round to -inf */ status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_hi, &F_lo); status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_lo, &F_lo); return status;}voidinit(){ flt_overlay F; dbl_overlay D; /* small is the smallest denormalized single float number */ F.layout.sign = 0; F.layout.exp = 0; F.layout.frac = 1; denorm_small = F.flt; /* == 2^(-149) */ if (debug) { print_double("float small", F.flt); } D.layout.sign = 0; D.layout.exp = 0; D.layout.frac_hi = 0; D.layout.frac_lo = 1; dbl_denorm_small = D.dbl; /* == 2^(-1022) */ if (debug) { print_double("double small", D.dbl); } /* n_small is the smallest normalized single precision float */ F.layout.exp = 1; norm_small = F.flt;}int check_int_to_flt_round(round_mode_t mode, long L, float *expected){ int status = 0; int I = L; char *int_name = "int"; flt_overlay R, E; char *result; int iter; set_rounding_mode(mode); E.flt = *expected; for (iter = 0; iter < 2; iter++) { int stat = 0; R.flt = (iter == 0 ? (float)I : (float)L); if ((R.layout.sign != E.layout.sign) || (R.layout.exp != E.layout.exp) || (R.layout.frac != E.layout.frac)) { result = "FAILED"; stat = 1; } else { result = "PASSED"; stat = 0; } printf("%s:%s:(float)(%4s)%9d = %11.1f", round_mode_name[mode], result, int_name, I, R.flt); if (stat) { print_single("\n\texpected: %.1f ", &E.flt); print_single("\n\trounded ", &R.flt); } putchar('\n'); status |= stat; if (!long_is_64_bits) break; int_name = "long"; } return status;}int check_long_to_dbl_round(round_mode_t mode, long L, double *expected){ int status = 0; dbl_overlay R, E; char *result; set_rounding_mode(mode); E.dbl = *expected; R.dbl = (double)L; if ((R.layout.sign != E.layout.sign) || (R.layout.exp != E.layout.exp) || (R.layout.frac_lo != E.layout.frac_lo) || (R.layout.frac_hi != E.layout.frac_hi)) { result = "FAILED"; status = 1; } else { result = "PASSED"; status = 0; } printf("%s:%s:(double)(%18ld) = %20.1f", round_mode_name[mode], result, L, R.dbl); if (status) { printf("\n\texpected %.1f : ", E.dbl); } putchar('\n'); return status;}int test_int_to_float_convert(char *msg){ int status = 0; int int24_hi = 0x03ff0fff; int int24_lo = 0x03ff0ffd; float pos_flt_lo = 67047420.0; float pos_flt_hi = 67047424.0; float neg_flt_lo = -67047420.0; float neg_flt_hi = -67047424.0; printf("-------------------------- %s --------------------------\n", msg); status |= check_int_to_flt_round(TO_NEAREST, int24_lo, &pos_flt_lo); status |= check_int_to_flt_round(TO_NEAREST, int24_hi, &pos_flt_hi); status |= check_int_to_flt_round(TO_ZERO, int24_lo, &pos_flt_lo); status |= check_int_to_flt_round(TO_ZERO, int24_hi, &pos_flt_lo); status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_lo, &pos_flt_hi); status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_hi, &pos_flt_hi); status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_lo, &pos_flt_lo); status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_hi, &pos_flt_lo); status |= check_int_to_flt_round(TO_NEAREST, -int24_lo, &neg_flt_lo); status |= check_int_to_flt_round(TO_NEAREST, -int24_hi, &neg_flt_hi); status |= check_int_to_flt_round(TO_ZERO, -int24_lo, &neg_flt_lo); status |= check_int_to_flt_round(TO_ZERO, -int24_hi, &neg_flt_lo); status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_lo, &neg_flt_lo); status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_hi, &neg_flt_lo); status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_lo, &neg_flt_hi); status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_hi, &neg_flt_hi); return status;}#ifdef __powerpc64__int test_long_to_double_convert(char *msg){ int status = 0; long long55_hi = 0x07ff0ffffffffff; long long55_lo = 0x07ff0fffffffffd; double pos_dbl_lo = 36012304344547324.0; double pos_dbl_hi = 36012304344547328.0; double neg_dbl_lo = -36012304344547324.0; double neg_dbl_hi = -36012304344547328.0; printf("-------------------------- %s --------------------------\n", msg); status |= check_long_to_dbl_round(TO_NEAREST, long55_lo, &pos_dbl_lo); status |= check_long_to_dbl_round(TO_NEAREST, long55_hi, &pos_dbl_hi); status |= check_long_to_dbl_round(TO_ZERO, long55_lo, &pos_dbl_lo); status |= check_long_to_dbl_round(TO_ZERO, long55_hi, &pos_dbl_lo); status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_lo, &pos_dbl_hi); status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_hi, &pos_dbl_hi); status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_lo, &pos_dbl_lo); status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_hi, &pos_dbl_lo); status |= check_long_to_dbl_round(TO_NEAREST, -long55_lo, &neg_dbl_lo); status |= check_long_to_dbl_round(TO_NEAREST, -long55_hi, &neg_dbl_hi); status |= check_long_to_dbl_round(TO_ZERO, -long55_lo, &neg_dbl_lo); status |= check_long_to_dbl_round(TO_ZERO, -long55_hi, &neg_dbl_lo); status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_lo, &neg_dbl_lo); status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_hi, &neg_dbl_lo); status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_lo, &neg_dbl_hi); status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_hi, &neg_dbl_hi); return status;}#endifint check_single_arithmetic_op(flt_op_t op){ char *result; int status = 0; dbl_overlay R, E; double qtr, half, fA, fB, fD; round_mode_t mode; int q, s; bool_t two_args = TRUE; float whole = denorm_small;#define BINOP(op) \ __asm__ volatile( \ op" %0, %1, %2\n\t" \ : "=f"(fD) : "f"(fA) , "f"(fB));#define UNOP(op) \ __asm__ volatile( \ op" %0, %1\n\t" \ : "=f"(fD) : "f"(fA)); half = (double)whole/2; qtr = half/2; if (debug) { print_double("qtr", qtr); print_double("whole", whole); print_double("2*whole", 2*whole); } for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++) for (s = -1; s < 2; s += 2) for (q = 1; q < 4; q += 2) { double expected; double lo = s*whole; double hi = s*2*whole; switch(op) { case FADDS: fA = s*whole; fB = s*q*qtr; break; case FSUBS: fA = s*2*whole; fB = s*(q == 1 ? 3 : 1)*qtr; break; case FMULS: fA = 0.5; fB = s*(4+q)*half; break; case FDIVS: fA = s*(4+q)*half; fB = 2.0; break; default: assert("check_single_arithmetic_op: unexpected op", FALSE); break; } switch(mode) { case TO_NEAREST: expected = (q == 1 ? lo : hi); break; case TO_ZERO: expected = lo; break; case TO_PLUS_INFINITY: expected = (s == 1 ? hi : lo); break; case TO_MINUS_INFINITY: expected = (s == 1 ? lo : hi); break; } set_rounding_mode(mode); /* ** do the double precision dual operation just for comparison ** when debugging */ switch(op) { case FADDS: BINOP("fadds"); R.dbl = fD; BINOP("fadd"); break; case FSUBS: BINOP("fsubs"); R.dbl = fD; BINOP("fsub"); break; case FMULS: BINOP("fmuls"); R.dbl = fD; BINOP("fmul"); break; case FDIVS: BINOP("fdivs"); R.dbl = fD; BINOP("fdiv"); break; default: assert("check_single_arithmetic_op: unexpected op", FALSE); break; }#undef UNOP#undef BINOP E.dbl = expected; if ((R.layout.sign != E.layout.sign) || (R.layout.exp != E.layout.exp) || (R.layout.frac_lo != E.layout.frac_lo) || (R.layout.frac_hi != E.layout.frac_hi)) { result = "FAILED"; status = 1; } else { result = "PASSED"; status = 0; } printf("%s:%s:%s(%-13a", round_mode_name[mode], result, flt_op_names[op], fA); if (two_args) printf(", %-13a", fB); printf(") = %-13a", R.dbl); if (status) printf("\n\texpected %a", E.dbl); putchar('\n'); if (debug) { print_double("hi", hi); print_double("lo", lo); print_double("expected", expected); print_double("got", R.dbl); print_double("double result", fD); } } return status;}int check_single_guarded_arithmetic_op(flt_op_t op){ typedef struct { int num, den, frac; } fdivs_t; char *result; int status = 0; flt_overlay A, B, Z; dbl_overlay Res, Exp; double fA, fB, fC, fD; round_mode_t mode; int g, s; int arg_count; fdivs_t divs_guard_cases[16] = { { 105, 56, 0x700000 }, /* : 0 */ { 100, 57, 0x608FB8 }, /* : 1 */ { 000, 00, 0x000000 }, /* : X */ { 100, 52, 0x762762 }, /* : 3 */ { 000, 00, 0x000000 }, /* : X */ { 100, 55, 0x68BA2E }, /* : 5 */ { 000, 00, 0x000000 }, /* : X */ { 100, 51, 0x7AFAFA }, /* : 7 */ { 000, 00, 0x000000 }, /* : X */ { 100, 56, 0x649249 }, /* : 9 */ { 000, 00, 0x000000 }, /* : X */ { 100, 54, 0x6D097B }, /* : B */ { 000, 00, 0x000000 }, /* : X */ { 100, 59, 0x58F2FB }, /* : D */ { 000, 00, 0x000000 }, /* : X */ { 101, 52, 0x789D89 } /* : F */ }; /* 0x1.00000 00000000p-3 */ /* set up the invariant fields of B, the arg to cause rounding */ B.flt = 0.0; B.layout.exp = 124; /* -3 */ /* set up args so result is always Z = 1.200000000000<g>p+0 */ Z.flt = 1.0; Z.layout.sign = 0;#define TERNOP(op) \ arg_count = 3; \ __asm__ volatile( \ op" %0, %1, %2, %3\n\t" \ : "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));#define BINOP(op) \ arg_count = 2; \ __asm__ volatile( \ op" %0, %1, %2\n\t" \ : "=f"(fD) : "f"(fA) , "f"(fB));#define UNOP(op) \ arg_count = 1; \ __asm__ volatile( \ op" %0, %1\n\t" \ : "=f"(fD) : "f"(fA)); for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++) for (s = -1; s < 2; s += 2) for (g = 0; g < 16; g += 1) { double lo, hi, expected; int LSB; int guard = 0; int z_sign = s; /* ** one argument will have exponent = 0 as will the result (by ** design) so choose the other argument with exponent -3 to ** force a 3 bit shift for scaling leaving us with 3 guard bits ** and the LSB bit at the bottom of the manitssa. */ switch(op) { case FADDS: /* 1p+0 + 1.00000<g>p-3 */ B.layout.frac = g; fB = s*B.flt; fA = s*1.0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -