📄 unaligned.c
字号:
} else { /* * f0 = 0.0, f1= 1.0. Those registers are constant and are thus * not saved, we must generate their spilled form on the fly */ switch(regnum) { case 0: float_spill_f0(fpval); break; case 1: float_spill_f1(fpval); break; default: /* * pt_regs or switch_stack ? */ addr = FR_IN_SW(regnum) ? (unsigned long)sw : (unsigned long)regs; DPRINT(("is_sw=%d tmp_base=%lx offset=0x%x\n", FR_IN_SW(regnum), addr, FR_OFFS(regnum))); addr += FR_OFFS(regnum); *fpval = *(struct ia64_fpreg *)addr; } }}static voidgetreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs){ struct switch_stack *sw = (struct switch_stack *)regs -1; unsigned long addr, *unat; if (regnum >= IA64_FIRST_STACKED_GR) { get_rse_reg(regs, regnum, val, nat); return; } /* * take care of r0 (read-only always evaluate to 0) */ if (regnum == 0) { *val = 0; if (nat) *nat = 0; return; } /* * Now look at registers in [0-31] range and init correct UNAT */ if (GR_IN_SW(regnum)) { addr = (unsigned long)sw; unat = &sw->ar_unat; } else { addr = (unsigned long)regs; unat = &sw->caller_unat; } DPRINT(("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum))); addr += GR_OFFS(regnum); *val = *(unsigned long *)addr; /* * do it only when requested */ if (nat) *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;}static voidemulate_load_updates(update_t type, load_store_t *ld, struct pt_regs *regs, unsigned long ifa){ /* * IMPORTANT: * Given the way we handle unaligned speculative loads, we should * not get to this point in the code but we keep this sanity check, * just in case. */ if (ld->x6_op == 1 || ld->x6_op == 3) { printk(KERN_ERR __FUNCTION__": register update on speculative load, error\n"); die_if_kernel("unaligned reference on specualtive load with register update\n", regs, 30); } /* * at this point, we know that the base register to update is valid i.e., * it's not r0 */ if (type == UPD_IMMEDIATE) { unsigned long imm; /* * Load +Imm: ldXZ r1=[r3],imm(9) * * * form imm9: [13:19] contain the first 7 bits */ imm = ld->x << 7 | ld->imm; /* * sign extend (1+8bits) if m set */ if (ld->m) imm |= SIGN_EXT9; /* * ifa == r3 and we know that the NaT bit on r3 was clear so * we can directly use ifa. */ ifa += imm; setreg(ld->r3, ifa, 0, regs); DPRINT(("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld->x, ld->m, imm, ifa)); } else if (ld->m) { unsigned long r2; int nat_r2; /* * Load +Reg Opcode: ldXZ r1=[r3],r2 * * Note: that we update r3 even in the case of ldfX.a * (where the load does not happen) * * The way the load algorithm works, we know that r3 does not * have its NaT bit set (would have gotten NaT consumption * before getting the unaligned fault). So we can use ifa * which equals r3 at this point. * * IMPORTANT: * The above statement holds ONLY because we know that we * never reach this code when trying to do a ldX.s. * If we ever make it to here on an ldfX.s then */ getreg(ld->imm, &r2, &nat_r2, regs); ifa += r2; /* * propagate Nat r2 -> r3 */ setreg(ld->r3, ifa, nat_r2, regs); DPRINT(("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld->imm, r2, ifa, nat_r2)); }}static intemulate_load_int(unsigned long ifa, load_store_t *ld, struct pt_regs *regs){ unsigned long val; unsigned int len = 1<< ld->x6_sz; /* * the macro supposes sequential access (which is the case) * if the first byte is an invalid address we return here. Otherwise * there is a guard page at the top of the user's address page and * the first access would generate a NaT consumption fault and return * with a SIGSEGV, which is what we want. * * Note: the first argument is ignored */ if (access_ok(VERIFY_READ, (void *)ifa, len) < 0) { DPRINT(("verify area failed on %lx\n", ifa)); return -1; } /* * r0, as target, doesn't need to be checked because Illegal Instruction * faults have higher priority than unaligned faults. * * r0 cannot be found as the base as it would never generate an * unaligned reference. */ /* * ldX.a we don't try to emulate anything but we must * invalidate the ALAT entry. * See comment below for explanation on how we handle ldX.a */ if (ld->x6_op != 0x2) { /* * we rely on the macros in unaligned.h for now i.e., * we let the compiler figure out how to read memory gracefully. * * We need this switch/case because the way the inline function * works. The code is optimized by the compiler and looks like * a single switch/case. */ switch(len) { case 2: val = ia64_get_unaligned((void *)ifa, 2); break; case 4: val = ia64_get_unaligned((void *)ifa, 4); break; case 8: val = ia64_get_unaligned((void *)ifa, 8); break; default: DPRINT(("unknown size: x6=%d\n", ld->x6_sz)); return -1; } setreg(ld->r1, val, 0, regs); } /* * check for updates on any kind of loads */ if (ld->op == 0x5 || ld->m) emulate_load_updates(ld->op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); /* * handling of various loads (based on EAS2.4): * * ldX.acq (ordered load): * - acquire semantics would have been used, so force fence instead. * * * ldX.c.clr (check load and clear): * - if we get to this handler, it's because the entry was not in the ALAT. * Therefore the operation reverts to a normal load * * ldX.c.nc (check load no clear): * - same as previous one * * ldX.c.clr.acq (ordered check load and clear): * - same as above for c.clr part. The load needs to have acquire semantics. So * we use the fence semantics which is stronger and thus ensures correctness. * * ldX.a (advanced load): * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the * address doesn't match requested size alignement. This means that we would * possibly need more than one load to get the result. * * The load part can be handled just like a normal load, however the difficult * part is to get the right thing into the ALAT. The critical piece of information * in the base address of the load & size. To do that, a ld.a must be executed, * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now * if we use the same target register, we will be okay for the check.a instruction. * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry * which would overlap within [r3,r3+X] (the size of the load was store in the * ALAT). If such an entry is found the entry is invalidated. But this is not good * enough, take the following example: * r3=3 * ld4.a r1=[r3] * * Could be emulated by doing: * ld1.a r1=[r3],1 * store to temporary; * ld1.a r1=[r3],1 * store & shift to temporary; * ld1.a r1=[r3],1 * store & shift to temporary; * ld1.a r1=[r3] * store & shift to temporary; * r1=temporary * * So int this case, you would get the right value is r1 but the wrong info in * the ALAT. Notice that you could do it in reverse to finish with address 3 * but you would still get the size wrong. To get the size right, one needs to * execute exactly the same kind of load. You could do it from a aligned * temporary location, but you would get the address wrong. * * So no matter what, it is not possible to emulate an advanced load * correctly. But is that really critical ? * * * Now one has to look at how ld.a is used, one must either do a ld.c.* or * chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no * entry found in ALAT), and that's perfectly ok because: * * - ld.c.*, if the entry is not present a normal load is executed * - chk.a.*, if the entry is not present, execution jumps to recovery code * * In either case, the load can be potentially retried in another form. * * So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT * must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up * a stale entry later) The register base update MUST also be performed. * * Now what is the content of the register and its NaT bit in the case we don't * do the load ? EAS2.4, says (in case an actual load is needed) * * - r1 = [r3], Nat = 0 if succeeds * - r1 = 0 Nat = 0 if trying to access non-speculative memory * * For us, there is nothing to do, because both ld.c.* and chk.a.* are going to * retry and thus eventually reload the register thereby changing Nat and * register content. */ /* * when the load has the .acq completer then * use ordering fence. */ if (ld->x6_op == 0x5 || ld->x6_op == 0xa) mb(); /* * invalidate ALAT entry in case of advanced load */ if (ld->x6_op == 0x2) invala_gr(ld->r1); return 0;}static intemulate_store_int(unsigned long ifa, load_store_t *ld, struct pt_regs *regs){ unsigned long r2; unsigned int len = 1<< ld->x6_sz; /* * the macro supposes sequential access (which is the case) * if the first byte is an invalid address we return here. Otherwise * there is a guard page at the top of the user's address page and * the first access would generate a NaT consumption fault and return * with a SIGSEGV, which is what we want. * * Note: the first argument is ignored */ if (access_ok(VERIFY_WRITE, (void *)ifa, len) < 0) { DPRINT(("verify area failed on %lx\n",ifa)); return -1; } /* * if we get to this handler, Nat bits on both r3 and r2 have already * been checked. so we don't need to do it * * extract the value to be stored */ getreg(ld->imm, &r2, 0, regs); /* * we rely on the macros in unaligned.h for now i.e., * we let the compiler figure out how to read memory gracefully. * * We need this switch/case because the way the inline function * works. The code is optimized by the compiler and looks like * a single switch/case. */ DPRINT(("st%d [%lx]=%lx\n", len, ifa, r2)); switch(len) { case 2: ia64_put_unaligned(r2, (void *)ifa, 2); break; case 4: ia64_put_unaligned(r2, (void *)ifa, 4); break; case 8: ia64_put_unaligned(r2, (void *)ifa, 8); break; default: DPRINT(("unknown size: x6=%d\n", ld->x6_sz)); return -1; } /* * stX [r3]=r2,imm(9) * * NOTE: * ld->r3 can never be r0, because r0 would not generate an * unaligned access. */ if (ld->op == 0x5) { unsigned long imm; /* * form imm9: [12:6] contain first 7bits */ imm = ld->x << 7 | ld->r1; /* * sign extend (8bits) if m set */ if (ld->m) imm |= SIGN_EXT9; /* * ifa == r3 (NaT is necessarily cleared) */ ifa += imm; DPRINT(("imm=%lx r3=%lx\n", imm, ifa)); setreg(ld->r3, ifa, 0, regs); } /* * we don't have alat_invalidate_multiple() so we need * to do the complete flush :-<< */ ia64_invala(); /* * stX.rel: use fence instead of release */ if (ld->x6_op == 0xd) mb(); return 0;}/* * floating point operations sizes in bytes */static const unsigned short float_fsz[4]={ 16, /* extended precision (e) */ 8, /* integer (8) */ 4, /* single precision (s) */ 8 /* double precision (d) */};static inline void mem2float_extended(struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static inline void mem2float_integer(struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static inline void mem2float_single(struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static inline void mem2float_double(struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static inline void float2mem_extended(struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static inline void float2mem_integer(struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static inline void float2mem_single(struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static inline void float2mem_double(struct ia64_fpreg *init, struct ia64_fpreg *final){ __asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6" :: "r"(init), "r"(final) : "f6","memory");}static intemulate_load_floatpair(unsigned long ifa, load_store_t *ld, struct pt_regs *regs){ struct ia64_fpreg fpr_init[2]; struct ia64_fpreg fpr_final[2]; unsigned long len = float_fsz[ld->x6_sz]; if (access_ok(VERIFY_READ, (void *)ifa, len<<1) < 0) { DPRINT(("verify area failed on %lx\n", ifa)); return -1; } /* * fr0 & fr1 don't need to be checked because Illegal Instruction * faults have higher priority than unaligned faults. * * r0 cannot be found as the base as it would never generate an * unaligned reference. */ /* * make sure we get clean buffers */ memset(&fpr_init,0, sizeof(fpr_init)); memset(&fpr_final,0, sizeof(fpr_final)); /* * ldfpX.a: we don't try to emulate anything but we must * invalidate the ALAT entry and execute updates, if any. */ if (ld->x6_op != 0x2) { /* * does the unaligned access */ memcpy(&fpr_init[0], (void *)ifa, len); memcpy(&fpr_init[1], (void *)(ifa+len), len); DPRINT(("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld->r1, ld->imm, ld->x6_sz));#ifdef DEBUG_UNALIGNED_TRAP { int i; char *c = (char *)&fpr_init; printk("fpr_init= "); for(i=0; i < len<<1; i++ ) { printk("%02x ", c[i]&0xff); } printk("\n"); }#endif /* * XXX fixme * Could optimize inlines by using ldfpX & 2 spills */ switch( ld->x6_sz ) { case 0: mem2float_extended(&fpr_init[0], &fpr_final[0]);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -