📄 unaligned.c
字号:
* To update f32-f127, there are three choices: * * (1) save f32-f127 to thread.fph and update the values there * (2) use a gigantic switch statement to directly access the registers * (3) generate code on the fly to update the desired register * * For now, we are using approach (1). */ if (regnum >= IA64_FIRST_ROTATING_FR) { ia64_sync_fph(current); current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval; } else { /* * pt_regs or switch_stack ? */ if (FR_IN_SW(regnum)) { addr = (unsigned long)sw; } else { addr = (unsigned long)regs; } DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum)); addr += FR_OFFS(regnum); *(struct ia64_fpreg *)addr = *fpval; /* * mark the low partition as being used now * * It is highly unlikely that this bit is not already set, but * let's do it for safety. */ regs->cr_ipsr |= IA64_PSR_MFL; }}/* * Those 2 inline functions generate the spilled versions of the constant floating point * registers which can be used with stfX */static inline voidfloat_spill_f0 (struct ia64_fpreg *final){ __asm__ __volatile__ ("stf.spill [%0]=f0" :: "r"(final) : "memory");}static inline voidfloat_spill_f1 (struct ia64_fpreg *final){ __asm__ __volatile__ ("stf.spill [%0]=f1" :: "r"(final) : "memory");}static voidgetfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs){ struct switch_stack *sw = (struct switch_stack *) regs - 1; unsigned long addr; /* * From EAS-2.5: FPDisableFault has higher priority than * Unaligned Fault. Thus, when we get here, we know the partition is * enabled. * * When regnum > 31, the register is still live and we need to force a save * to current->thread.fph to get access to it. See discussion in setfpreg() * for reasons and other ways of doing this. */ if (regnum >= IA64_FIRST_ROTATING_FR) { ia64_flush_fph(current); *fpval = current->thread.fph[IA64_FPH_OFFS(regnum)]; } else { /* * f0 = 0.0, f1= 1.0. Those registers are constant and are thus * not saved, we must generate their spilled form on the fly */ switch(regnum) { case 0: float_spill_f0(fpval); break; case 1: float_spill_f1(fpval); break; default: /* * pt_regs or switch_stack ? */ addr = FR_IN_SW(regnum) ? (unsigned long)sw : (unsigned long)regs; DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n", FR_IN_SW(regnum), addr, FR_OFFS(regnum)); addr += FR_OFFS(regnum); *fpval = *(struct ia64_fpreg *)addr; } }}static voidgetreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs){ struct switch_stack *sw = (struct switch_stack *) regs - 1; unsigned long addr, *unat; if (regnum >= IA64_FIRST_STACKED_GR) { get_rse_reg(regs, regnum, val, nat); return; } /* * take care of r0 (read-only always evaluate to 0) */ if (regnum == 0) { *val = 0; if (nat) *nat = 0; return; } /* * Now look at registers in [0-31] range and init correct UNAT */ if (GR_IN_SW(regnum)) { addr = (unsigned long)sw; unat = &sw->ar_unat; } else { addr = (unsigned long)regs; unat = &sw->caller_unat; } DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum)); addr += GR_OFFS(regnum); *val = *(unsigned long *)addr; /* * do it only when requested */ if (nat) *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;}static voidemulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa){ /* * IMPORTANT: * Given the way we handle unaligned speculative loads, we should * not get to this point in the code but we keep this sanity check, * just in case. */ if (ld.x6_op == 1 || ld.x6_op == 3) { printk(KERN_ERR __FUNCTION__": register update on speculative load, error\n"); die_if_kernel("unaligned reference on specualtive load with register update\n", regs, 30); } /* * at this point, we know that the base register to update is valid i.e., * it's not r0 */ if (type == UPD_IMMEDIATE) { unsigned long imm; /* * Load +Imm: ldXZ r1=[r3],imm(9) * * * form imm9: [13:19] contain the first 7 bits */ imm = ld.x << 7 | ld.imm; /* * sign extend (1+8bits) if m set */ if (ld.m) imm |= SIGN_EXT9; /* * ifa == r3 and we know that the NaT bit on r3 was clear so * we can directly use ifa. */ ifa += imm; setreg(ld.r3, ifa, 0, regs); DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa); } else if (ld.m) { unsigned long r2; int nat_r2; /* * Load +Reg Opcode: ldXZ r1=[r3],r2 * * Note: that we update r3 even in the case of ldfX.a * (where the load does not happen) * * The way the load algorithm works, we know that r3 does not * have its NaT bit set (would have gotten NaT consumption * before getting the unaligned fault). So we can use ifa * which equals r3 at this point. * * IMPORTANT: * The above statement holds ONLY because we know that we * never reach this code when trying to do a ldX.s. * If we ever make it to here on an ldfX.s then */ getreg(ld.imm, &r2, &nat_r2, regs); ifa += r2; /* * propagate Nat r2 -> r3 */ setreg(ld.r3, ifa, nat_r2, regs); DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2); }}static intemulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs){ unsigned int len = 1 << ld.x6_sz; /* * r0, as target, doesn't need to be checked because Illegal Instruction * faults have higher priority than unaligned faults. * * r0 cannot be found as the base as it would never generate an * unaligned reference. */ /* * ldX.a we don't try to emulate anything but we must invalidate the ALAT entry. * See comment below for explanation on how we handle ldX.a */ if (ld.x6_op != 0x2) { unsigned long val = 0; if (len != 2 && len != 4 && len != 8) { DPRINT("unknown size: x6=%d\n", ld.x6_sz); return -1; } /* this assumes little-endian byte-order: */ if (copy_from_user(&val, (void *) ifa, len)) return -1; setreg(ld.r1, val, 0, regs); } /* * check for updates on any kind of loads */ if (ld.op == 0x5 || ld.m) emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); /* * handling of various loads (based on EAS2.4): * * ldX.acq (ordered load): * - acquire semantics would have been used, so force fence instead. * * ldX.c.clr (check load and clear): * - if we get to this handler, it's because the entry was not in the ALAT. * Therefore the operation reverts to a normal load * * ldX.c.nc (check load no clear): * - same as previous one * * ldX.c.clr.acq (ordered check load and clear): * - same as above for c.clr part. The load needs to have acquire semantics. So * we use the fence semantics which is stronger and thus ensures correctness. * * ldX.a (advanced load): * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the * address doesn't match requested size alignement. This means that we would * possibly need more than one load to get the result. * * The load part can be handled just like a normal load, however the difficult * part is to get the right thing into the ALAT. The critical piece of information * in the base address of the load & size. To do that, a ld.a must be executed, * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now * if we use the same target register, we will be okay for the check.a instruction. * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry * which would overlap within [r3,r3+X] (the size of the load was store in the * ALAT). If such an entry is found the entry is invalidated. But this is not good * enough, take the following example: * r3=3 * ld4.a r1=[r3] * * Could be emulated by doing: * ld1.a r1=[r3],1 * store to temporary; * ld1.a r1=[r3],1 * store & shift to temporary; * ld1.a r1=[r3],1 * store & shift to temporary; * ld1.a r1=[r3] * store & shift to temporary; * r1=temporary * * So int this case, you would get the right value is r1 but the wrong info in * the ALAT. Notice that you could do it in reverse to finish with address 3 * but you would still get the size wrong. To get the size right, one needs to * execute exactly the same kind of load. You could do it from a aligned * temporary location, but you would get the address wrong. * * So no matter what, it is not possible to emulate an advanced load * correctly. But is that really critical ? * * * Now one has to look at how ld.a is used, one must either do a ld.c.* or * chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no * entry found in ALAT), and that's perfectly ok because: * * - ld.c.*, if the entry is not present a normal load is executed * - chk.a.*, if the entry is not present, execution jumps to recovery code * * In either case, the load can be potentially retried in another form. * * So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT * must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up * a stale entry later) The register base update MUST also be performed. * * Now what is the content of the register and its NaT bit in the case we don't * do the load ? EAS2.4, says (in case an actual load is needed) * * - r1 = [r3], Nat = 0 if succeeds * - r1 = 0 Nat = 0 if trying to access non-speculative memory * * For us, there is nothing to do, because both ld.c.* and chk.a.* are going to * retry and thus eventually reload the register thereby changing Nat and * register content. */ /* * when the load has the .acq completer then * use ordering fence. */ if (ld.x6_op == 0x5 || ld.x6_op == 0xa) mb(); /* * invalidate ALAT entry in case of advanced load */ if (ld.x6_op == 0x2) invala_gr(ld.r1); return 0;}static intemulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs){ unsigned long r2; unsigned int len = 1 << ld.x6_sz; /* * if we get to this handler, Nat bits on both r3 and r2 have already * been checked. so we don't need to do it * * extract the value to be stored */ getreg(ld.imm, &r2, 0, regs); /* * we rely on the macros in unaligned.h for now i.e., * we let the compiler figure out how to read memory gracefully. * * We need this switch/case because the way the inline function * works. The code is optimized by the compiler and looks like * a single switch/case. */ DPRINT("st%d [%lx]=%lx\n", len, ifa, r2); if (len != 2 && len != 4 && len != 8) { DPRINT("unknown size: x6=%d\n", ld.x6_sz); return -1; } /* this assumes little-endian byte-order: */ if (copy_to_user((void *) ifa, &r2, len)) return -1; /* * stX [r3]=r2,imm(9) * * NOTE: * ld.r3 can never be r0, because r0 would not generate an * unaligned access. */ if (ld.op == 0x5) { unsigned long imm; /* * form imm9: [12:6] contain first 7bits */ imm = ld.x << 7 | ld.r1; /* * sign extend (8bits) if m set */ if (ld.m) imm |= SIGN_EXT9; /* * ifa == r3 (NaT is necessarily cleared) */ ifa += imm; DPRINT("imm=%lx r3=%lx\n", imm, ifa); setreg(ld.r3, ifa, 0, regs); } /* * we don't have alat_invalidate_multiple() so we need * to do the complete flush :-<< */ ia64_invala(); /* * stX.rel: use fence instead of release */ if (ld.x6_op == 0xd) mb(); return 0;}/* * floating point operations sizes in bytes */static const unsigned char float_fsz[4]={ 10, /* extended precision (e) */ 8, /* integer (8) */ 4, /* single precision (s) */ 8 /* double precision (d) */};static inline voidmem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static inline voidmem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static inline voidmem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static inline voidmem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static inline voidfloat2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static inline voidfloat2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static inline voidfloat2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static inline voidfloat2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static intemulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -