📄 ppc_fpu.c
字号:
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): res->type = a->type; res->e = a->e; break; case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): res->s = a->s; // fall-thru case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): res->type = a->type; break; case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): res->s = b->s; // fall-thru case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): res->type = b->type; break; case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): res->type = ppc_fpr_NaN; break; }}// calculate one of these:// + m1 * m2 + s// + m1 * m2 - s// - m1 * m2 + s// - m1 * m2 - s// using a 106 bit accumulator//// .752//// FIXME: There is a bug in this code that shows up in Mac OS X Finder fwd/bwd// button: the top line is not rendered correctly. This works with the jitc_x86// FPU however...inline void ppc_fpu_mul_add(ppc_double *res, ppc_double *m1, ppc_double *m2, ppc_double *s){ ppc_quadro p;/* ht_printf("m1 = %d * %016qx * 2^%d, %s\n", m1.s, m1.m, m1.e, ppc_fpu_get_fpr_type(m1.type)); ht_printf("m2 = %d * %016qx * 2^%d, %s\n", m2.s, m2.m, m2.e, ppc_fpu_get_fpr_type(m2.type));*/ // create product with 106 significant bits ppc_fpu_mul_quadro(&p, m1, m2, 106);/* ht_printf("p = %d * %016qx%016qx * 2^%d, %s\n", p.s, p.m0, p.m1, p.e, ppc_fpu_get_fpr_type(p.type));*/ // convert s into ppc_quadro/* ht_printf("s = %d * %016qx * 2^%d %s\n", s.s, s.m, s.e, ppc_fpu_get_fpr_type(s.type));*/ ppc_quadro q; q.e = s->e; q.s = s->s; q.type = s->type; q.m0 = 0; q.m1 = s->m; // .. with 106 significant bits ppc_fpu_quadro_mshl(&q, 106-56);/* ht_printf("q = %d * %016qx%016qx * 2^%d %s\n", q.s, q.m0, q.m1, q.e, ppc_fpu_get_fpr_type(q.type));*/ // now we must add p, q. ppc_quadro x; ppc_fpu_add_quadro(&x, &p, &q); // x = [107]/* ht_printf("x = %d * %016qx%016qx * 2^%d %s\n", x.s, x.m0, x.m1, x.e, ppc_fpu_get_fpr_type(x.type));*/ res->type = x.type; res->s = x.s; res->e = x.e; if (x.type == ppc_fpr_norm) { res->m = x.m0 << 13; // 43 bits from m0 res->m |= (x.m1 >> (64-12)) << 1; // 12 bits from m1 res->m |= x.m1 & 1; // X' bit from m1 }/* ht_printf("res = %d * %016qx * 2^%d %s\n", res.s, res.m, res.e, ppc_fpu_get_fpr_type(res.type));*/}inline void ppc_fpu_div(ppc_double *res, const ppc_double *a, const ppc_double *b){ res->s = a->s ^ b->s; switch (PPC_FPR_TYPE2(a->type, b->type)) { case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { res->type = ppc_fpr_norm; res->e = a->e - b->e; res->m = 0; uint64 am = a->m, bm = b->m; uint i = 0; while (am && (i<56)) { res->m <<= 1; if (am >= bm) { res->m |= 1; am -= bm; } am <<= 1;// printf("am=%llx, bm=%llx, rm=%llx\n", am, bm, res.m); i++; } res->m <<= 57-i; if (res->m & (1ULL << 56)) { res->m >>= 1; } else { res->e--; }// printf("final: am=%llx, bm=%llx, rm=%llx\n", am, bm, res.m); break; } case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): res->e = a->e; // fall-thru case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): res->s = a->s; // fall-thru case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): res->type = a->type; break; case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): res->s = b->s; res->type = b->type; break; case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): res->type = ppc_fpr_zero; break; case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): res->type = ppc_fpr_Inf; break; case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): res->type = ppc_fpr_NaN; break; }}inline void ppc_fpu_sqrt(ppc_double *D, const ppc_double *B){ switch (B->type) { case ppc_fpr_norm: if (B->s) { D->type = ppc_fpr_NaN; gCPU.fpscr |= FPSCR_VXSQRT; break; } // D := 1/2(D_old + B/D_old) *D = *B; D->e /= 2; int i; for (i=0; i<6; i++) { ppc_double D_old = *D; ppc_double B_div_D_old; ppc_fpu_div(&B_div_D_old, B, &D_old); ppc_fpu_add(D, &D_old, &B_div_D_old); D->e--; /* uint64 e; ppc_double E = D; ppc_fpu_pack_double(E, e); printf("%.20f\n", *(double *)&e);*/ } break; case ppc_fpr_zero: D->type = ppc_fpr_zero; D->s = B->s; break; case ppc_fpr_Inf: if (B->s) { D->type = ppc_fpr_NaN; gCPU.fpscr |= FPSCR_VXSQRT; } else { D->type = ppc_fpr_Inf; D->s = 0; } break; case ppc_fpr_NaN: D->type = ppc_fpr_NaN; break; } }void ppc_fpu_test(){ ppc_double A, B, C; double a, b, c; A.type = B.type = ppc_fpr_norm; A.s = 1; A.e = 0; A.m = 0; A.m = ((1ULL<<56)-1)-((1ULL<<10)-1); ht_printf("%qb\n", A.m); B.s = 1; B.e = 0; B.m = 0; B.m = ((1ULL<<56)-1)-((1ULL<<50)-1); a = ppc_fpu_get_double(&A); b = ppc_fpu_get_double(&B); printf("%f + %f = \n", a, b); ppc_fpu_add(&C, &A, &B); uint64 d; uint32 s; ppc_fpu_pack_double_as_single(&C, &d); ht_printf("%064qb\n", d); ppc_fpu_unpack_double(&C, d); ppc_fpu_pack_single(&C, &s); ht_printf("single: %032b\n", s); ppc_single Cs; ppc_fpu_unpack_single(&Cs, s); ppc_fpu_single_to_double(&Cs, &C);// ht_printf("%d\n", ppc_fpu_double_to_int(C)); c = ppc_fpu_get_double(&C); printf("%f\n", c);}/* * a and b must not be NaNs */inline uint32 ppc_fpu_compare(ppc_double *a, ppc_double *b){ if (a->type == ppc_fpr_zero) { if (b->type == ppc_fpr_zero) return 2; return (b->s) ? 4: 8; } if (b->type == ppc_fpr_zero) return (a->s) ? 8: 4; if (a->s != b->s) return (a->s) ? 8: 4; if (a->e > b->e) return (a->s) ? 8: 4; if (a->e < b->e) return (a->s) ? 4: 8; if (a->m > b->m) return (a->s) ? 8: 4; if (a->m < b->m) return (a->s) ? 4: 8; return 2;}double ppc_fpu_get_double_uint(uint64 d){ ppc_double dd; ppc_fpu_unpack_double(&dd, d); return ppc_fpu_get_double(&dd);}double ppc_fpu_get_double(ppc_double *d){ if (d->type == ppc_fpr_norm) { double r = d->m; int i; for (i=0; i<55; i++) { r = r / 2.0; } if (d->e < 0) { int i; for (i=0; i>d->e; i--) { r = r / 2.0; } } else if (d->e > 0) { int i; for (i=0; i<d->e; i++) { r = r * 2.0; } } if (d->s) r = -r; return r; } else { return 0.0; }}/*********************************************************************************** * */ /* * fabsx Floating Absolute Value * .484 */void ppc_opc_fabsx(){ int frD, frA, frB; PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); PPC_OPC_ASSERT(frA==0); gCPU.fpr[frD] = gCPU.fpr[frB] & ~FPU_SIGN_BIT; if (gCPU.current_opc & PPC_OPC_Rc) { // update cr1 flags PPC_FPU_ERR("fabs.\n"); }}/* * faddx Floating Add (Double-Precision) * .485 */void ppc_opc_faddx(){ int frD, frA, frB, frC; PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); PPC_OPC_ASSERT(frC==0); ppc_double A, B, D; ppc_fpu_unpack_double(&A, gCPU.fpr[frA]); ppc_fpu_unpack_double(&B, gCPU.fpr[frB]); if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) { gCPU.fpscr |= FPSCR_VXISI; } ppc_fpu_add(&D, &A, &B); gCPU.fpscr |= ppc_fpu_pack_double(&D, &(gCPU.fpr[frD])); if (gCPU.current_opc & PPC_OPC_Rc) { // update cr1 flags PPC_FPU_ERR("fadd.\n"); }}/* * faddx Floating Add Single * .486 */void ppc_opc_faddsx(){ int frD, frA, frB, frC; PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); PPC_OPC_ASSERT(frC==0); ppc_double A, B, D; ppc_fpu_unpack_double(&A, gCPU.fpr[frA]); ppc_fpu_unpack_double(&B, gCPU.fpr[frB]); if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) { gCPU.fpscr |= FPSCR_VXISI; } ppc_fpu_add(&D, &A, &B); gCPU.fpscr |= ppc_fpu_pack_double_as_single(&D, &(gCPU.fpr[frD])); if (gCPU.current_opc & PPC_OPC_Rc) { // update cr1 flags PPC_FPU_ERR("fadds.\n"); }}/* * fcmpo Floating Compare Ordered * .488 */static uint32 ppc_fpu_cmp_and_mask[8] = { 0xfffffff0, 0xffffff0f, 0xfffff0ff, 0xffff0fff, 0xfff0ffff, 0xff0fffff, 0xf0ffffff, 0x0fffffff,};void ppc_opc_fcmpo(){ int crfD, frA, frB; PPC_OPC_TEMPL_X(gCPU.current_opc, crfD, frA, frB); crfD >>= 2; ppc_double A, B; ppc_fpu_unpack_double(&A, gCPU.fpr[frA]); ppc_fpu_unpack_double(&B, gCPU.fpr[frB]); uint32 cmp; if (A.type == ppc_fpr_NaN || B.type == ppc_fpr_NaN) { gCPU.fpscr |= FPSCR_VXSNAN; /*if (bla)*/ gCPU.fpscr |= FPSCR_VXVC; cmp = 1; } else { cmp = ppc_fpu_compare(&A, &B); } crfD = 7-crfD; gCPU.fpscr &= ~0x1f000; gCPU.fpscr |= (cmp << 12); gCPU.cr &= ppc_fpu_cmp_and_mask[crfD]; gCPU.cr |= (cmp << (crfD * 4));}/* * fcmpu Floating Compare Unordered * .489 */void ppc_opc_fcmpu(){ int crfD, frA, frB; PPC_OPC_TEMPL_X(gCPU.current_opc, crfD, frA, frB); crfD >>= 2; ppc_double A, B; ppc_fpu_unpack_double(&A, gCPU.fpr[frA]); ppc_fpu_unpack_double(&B, gCPU.fpr[frB]); uint32 cmp; if (A.type == ppc_fpr_NaN || B.type == ppc_fpr_NaN) { gCPU.fpscr |= FPSCR_VXSNAN; cmp = 1; } else { cmp = ppc_fpu_compare(&A, &B); } crfD = 7-crfD; gCPU.fpscr &= ~0x1f000; gCPU.fpscr |= (cmp << 12); gCPU.cr &= ppc_fpu_cmp_and_mask[crfD]; gCPU.cr |= (cmp << (crfD * 4));}/* * fctiwx Floating Convert to Integer Word * .492 */void ppc_opc_fctiwx(){ int frD, frA, frB; PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); PPC_OPC_ASSERT(frA==0); ppc_double B; ppc_fpu_unpack_double(&B, gCPU.fpr[frB]); gCPU.fpr[frD] = ppc_fpu_double_to_int(&B); if (gCPU.current_opc & PPC_OPC_Rc) { // update cr1 flags PPC_FPU_ERR("fctiw.\n"); }}/* * fctiwzx Floating Convert to Integer Word with Round toward Zero * .493 */void ppc_opc_fctiwzx(){ int frD, frA, frB; PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); PPC_OPC_ASSERT(frA==0); uint32 oldfpscr = gCPU.fpscr; gCPU.fpscr &= ~3; gCPU.fpscr |= 1; ppc_double B; ppc_fpu_unpack_double(&B, gCPU.fpr[frB]); gCPU.fpr[frD] = ppc_fpu_double_to_int(&B); gCPU.fpscr = oldfpscr; if (gCPU.current_opc & PPC_OPC_Rc) { // update cr1 flags PPC_FPU_ERR("fctiwz.\n"); }}/* * fdivx Floating Divide (Double-Precision) * .494 */void ppc_opc_fdivx(){ int frD, frA, frB, frC; PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); PPC_OPC_ASSERT(frC==0); ppc_double A, B, D; ppc_fpu_unpack_double(&A, gCPU.fpr[frA]); ppc_fpu_unpack_double(&B, gCPU.fpr[frB]); if (A.type == ppc_fpr_zero && B.type == ppc_fpr_zero) { gCPU.fpscr |= FPSCR_VXZDZ; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -