📄 unaligned.c
字号:
CASE_FIXED_FP(69); CASE_FIXED_FP(70); CASE_FIXED_FP(71); CASE_FIXED_FP(72); CASE_FIXED_FP(73); CASE_FIXED_FP(74); CASE_FIXED_FP(75); CASE_FIXED_FP(76); CASE_FIXED_FP(77); CASE_FIXED_FP(78); CASE_FIXED_FP(79); CASE_FIXED_FP(80); CASE_FIXED_FP(81); CASE_FIXED_FP(82); CASE_FIXED_FP(83); CASE_FIXED_FP(84); CASE_FIXED_FP(85); CASE_FIXED_FP(86); CASE_FIXED_FP(87); CASE_FIXED_FP(88); CASE_FIXED_FP(89); CASE_FIXED_FP(90); CASE_FIXED_FP(91); CASE_FIXED_FP(92); CASE_FIXED_FP(93); CASE_FIXED_FP(94); CASE_FIXED_FP(95); CASE_FIXED_FP(96); CASE_FIXED_FP(97); CASE_FIXED_FP(98); CASE_FIXED_FP(99); CASE_FIXED_FP(100); CASE_FIXED_FP(101); CASE_FIXED_FP(102); CASE_FIXED_FP(103); CASE_FIXED_FP(104); CASE_FIXED_FP(105); CASE_FIXED_FP(106); CASE_FIXED_FP(107); CASE_FIXED_FP(108); CASE_FIXED_FP(109); CASE_FIXED_FP(110); CASE_FIXED_FP(111); CASE_FIXED_FP(112); CASE_FIXED_FP(113); CASE_FIXED_FP(114); CASE_FIXED_FP(115); CASE_FIXED_FP(116); CASE_FIXED_FP(117); CASE_FIXED_FP(118); CASE_FIXED_FP(119); CASE_FIXED_FP(120); CASE_FIXED_FP(121); CASE_FIXED_FP(122); CASE_FIXED_FP(123); CASE_FIXED_FP(124); CASE_FIXED_FP(125); CASE_FIXED_FP(126); CASE_FIXED_FP(127); }#undef CASE_FIXED_FP#undef CASE_RESTORED_FP}#endif /* XEN */#ifdef XENvoid#elsestatic void#endifgetreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs){ struct switch_stack *sw = (struct switch_stack *) regs - 1; unsigned long addr, *unat; if (regnum >= IA64_FIRST_STACKED_GR) { get_rse_reg(regs, regnum, val, nat); return; } /* * take care of r0 (read-only always evaluate to 0) */ if (regnum == 0) { *val = 0; if (nat) *nat = 0; return; } /* * Now look at registers in [0-31] range and init correct UNAT */ if (GR_IN_SW(regnum)) { addr = (unsigned long)sw; unat = &sw->ar_unat; } else { addr = (unsigned long)regs;#if defined(XEN) unat = ®s->eml_unat;;#else unat = &sw->caller_unat;#endif } DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum)); addr += GR_OFFS(regnum); *val = *(unsigned long *)addr; /* * do it only when requested */ if (nat) *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;}#ifndef XENstatic voidemulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa){ /* * IMPORTANT: * Given the way we handle unaligned speculative loads, we should * not get to this point in the code but we keep this sanity check, * just in case. */ if (ld.x6_op == 1 || ld.x6_op == 3) { printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__); die_if_kernel("unaligned reference on speculative load with register update\n", regs, 30); } /* * at this point, we know that the base register to update is valid i.e., * it's not r0 */ if (type == UPD_IMMEDIATE) { unsigned long imm; /* * Load +Imm: ldXZ r1=[r3],imm(9) * * * form imm9: [13:19] contain the first 7 bits */ imm = ld.x << 7 | ld.imm; /* * sign extend (1+8bits) if m set */ if (ld.m) imm |= SIGN_EXT9; /* * ifa == r3 and we know that the NaT bit on r3 was clear so * we can directly use ifa. */ ifa += imm; setreg(ld.r3, ifa, 0, regs); DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa); } else if (ld.m) { unsigned long r2; int nat_r2; /* * Load +Reg Opcode: ldXZ r1=[r3],r2 * * Note: that we update r3 even in the case of ldfX.a * (where the load does not happen) * * The way the load algorithm works, we know that r3 does not * have its NaT bit set (would have gotten NaT consumption * before getting the unaligned fault). So we can use ifa * which equals r3 at this point. * * IMPORTANT: * The above statement holds ONLY because we know that we * never reach this code when trying to do a ldX.s. * If we ever make it to here on an ldfX.s then */ getreg(ld.imm, &r2, &nat_r2, regs); ifa += r2; /* * propagate Nat r2 -> r3 */ setreg(ld.r3, ifa, nat_r2, regs); DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2); }}static intemulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs){ unsigned int len = 1 << ld.x6_sz; unsigned long val = 0; /* * r0, as target, doesn't need to be checked because Illegal Instruction * faults have higher priority than unaligned faults. * * r0 cannot be found as the base as it would never generate an * unaligned reference. */ /* * ldX.a we will emulate load and also invalidate the ALAT entry. * See comment below for explanation on how we handle ldX.a */ if (len != 2 && len != 4 && len != 8) { DPRINT("unknown size: x6=%d\n", ld.x6_sz); return -1; } /* this assumes little-endian byte-order: */ if (copy_from_user(&val, (void __user *) ifa, len)) return -1; setreg(ld.r1, val, 0, regs); /* * check for updates on any kind of loads */ if (ld.op == 0x5 || ld.m) emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); /* * handling of various loads (based on EAS2.4): * * ldX.acq (ordered load): * - acquire semantics would have been used, so force fence instead. * * ldX.c.clr (check load and clear): * - if we get to this handler, it's because the entry was not in the ALAT. * Therefore the operation reverts to a normal load * * ldX.c.nc (check load no clear): * - same as previous one * * ldX.c.clr.acq (ordered check load and clear): * - same as above for c.clr part. The load needs to have acquire semantics. So * we use the fence semantics which is stronger and thus ensures correctness. * * ldX.a (advanced load): * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the * address doesn't match requested size alignment. This means that we would * possibly need more than one load to get the result. * * The load part can be handled just like a normal load, however the difficult * part is to get the right thing into the ALAT. The critical piece of information * in the base address of the load & size. To do that, a ld.a must be executed, * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now * if we use the same target register, we will be okay for the check.a instruction. * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry * which would overlap within [r3,r3+X] (the size of the load was store in the * ALAT). If such an entry is found the entry is invalidated. But this is not good * enough, take the following example: * r3=3 * ld4.a r1=[r3] * * Could be emulated by doing: * ld1.a r1=[r3],1 * store to temporary; * ld1.a r1=[r3],1 * store & shift to temporary; * ld1.a r1=[r3],1 * store & shift to temporary; * ld1.a r1=[r3] * store & shift to temporary; * r1=temporary * * So in this case, you would get the right value is r1 but the wrong info in * the ALAT. Notice that you could do it in reverse to finish with address 3 * but you would still get the size wrong. To get the size right, one needs to * execute exactly the same kind of load. You could do it from a aligned * temporary location, but you would get the address wrong. * * So no matter what, it is not possible to emulate an advanced load * correctly. But is that really critical ? * * We will always convert ld.a into a normal load with ALAT invalidated. This * will enable compiler to do optimization where certain code path after ld.a * is not required to have ld.c/chk.a, e.g., code path with no intervening stores. * * If there is a store after the advanced load, one must either do a ld.c.* or * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no * entry found in ALAT), and that's perfectly ok because: * * - ld.c.*, if the entry is not present a normal load is executed * - chk.a.*, if the entry is not present, execution jumps to recovery code * * In either case, the load can be potentially retried in another form. * * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick * up a stale entry later). The register base update MUST also be performed. */ /* * when the load has the .acq completer then * use ordering fence. */ if (ld.x6_op == 0x5 || ld.x6_op == 0xa) mb(); /* * invalidate ALAT entry in case of advanced load */ if (ld.x6_op == 0x2) invala_gr(ld.r1); return 0;}static intemulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs){ unsigned long r2; unsigned int len = 1 << ld.x6_sz; /* * if we get to this handler, Nat bits on both r3 and r2 have already * been checked. so we don't need to do it * * extract the value to be stored */ getreg(ld.imm, &r2, NULL, regs); /* * we rely on the macros in unaligned.h for now i.e., * we let the compiler figure out how to read memory gracefully. * * We need this switch/case because the way the inline function * works. The code is optimized by the compiler and looks like * a single switch/case. */ DPRINT("st%d [%lx]=%lx\n", len, ifa, r2); if (len != 2 && len != 4 && len != 8) { DPRINT("unknown size: x6=%d\n", ld.x6_sz); return -1; } /* this assumes little-endian byte-order: */ if (copy_to_user((void __user *) ifa, &r2, len)) return -1; /* * stX [r3]=r2,imm(9) * * NOTE: * ld.r3 can never be r0, because r0 would not generate an * unaligned access. */ if (ld.op == 0x5) { unsigned long imm; /* * form imm9: [12:6] contain first 7bits */ imm = ld.x << 7 | ld.r1; /* * sign extend (8bits) if m set */ if (ld.m) imm |= SIGN_EXT9; /* * ifa == r3 (NaT is necessarily cleared) */ ifa += imm; DPRINT("imm=%lx r3=%lx\n", imm, ifa); setreg(ld.r3, ifa, 0, regs); } /* * we don't have alat_invalidate_multiple() so we need * to do the complete flush :-<< */ ia64_invala(); /* * stX.rel: use fence instead of release */ if (ld.x6_op == 0xd) mb(); return 0;}#endif /* XEN *//* * floating point operations sizes in bytes */static const unsigned char float_fsz[4]={ 10, /* extended precision (e) */ 8, /* integer (8) */ 4, /* single precision (s) */ 8 /* double precision (d) */};static inline voidmem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final){ ia64_ldfe(6, init); ia64_stop(); ia64_stf_spill(final, 6);}static inline voidmem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final){ ia64_ldf8(6, init); ia64_stop(); ia64_stf_spill(final, 6);}static inline voidmem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final){ ia64_ldfs(6, init); ia64_stop(); ia64_stf_spill(final, 6);}static inline voidmem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final){ ia64_ldfd(6, init); ia64_stop(); ia64_stf_spill(final, 6);}static inline voidfloat2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final){ ia64_ldf_fill(6, init); ia64_stop(); ia64_stfe(final, 6);}static inline voidfloat2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final){ ia64_ldf_fill(6, init); ia64_stop(); ia64_stf8(final, 6);}static inline voidfloat2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final){ ia64_ldf_fill(6, init); ia64_stop(); ia64_stfs(final, 6);}static inline voidfloat2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final){ ia64_ldf_fill(6, init); ia64_stop(); ia64_stfd(final, 6);}#ifndef XENstatic intemulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs){ struct ia64_fpreg fpr_init[2]; struct ia64_fpreg fpr_final[2]; unsigned long len = float_fsz[ld.x6_sz]; /* * fr0 & fr1 don't need to be checked because Illegal Instruction faults have * higher priority than unaligned faults. * * r0 cannot be found as the base as it would never generate an unaligned * reference. */ /* * make sure we get clean buffers */ memset(&fpr_init, 0, sizeof(fpr_init)); memset(&fpr_final, 0, sizeof(fpr_final)); /* * ldfpX.a: we don't try to emulate anything but we must * invalidate the ALAT entry and execute updates, if any.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -