📄 unaligned.c

📁 Linux内核源代码为压缩文件是<<Linux内核>>一书中的源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
	} else {		/*		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus	 	 * not saved, we must generate their spilled form on the fly		 */		switch(regnum) {		case 0:			float_spill_f0(fpval);			break;		case 1:			float_spill_f1(fpval);			break;		default:			/*			 * pt_regs or switch_stack ?			 */			addr =  FR_IN_SW(regnum) ? (unsigned long)sw						 : (unsigned long)regs;			DPRINT(("is_sw=%d tmp_base=%lx offset=0x%x\n",				FR_IN_SW(regnum), addr, FR_OFFS(regnum)));			addr  += FR_OFFS(regnum);			*fpval = *(struct ia64_fpreg *)addr;		}	}}static voidgetreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs){	struct switch_stack *sw = (struct switch_stack *)regs -1;	unsigned long addr, *unat; 	if (regnum >= IA64_FIRST_STACKED_GR) {		get_rse_reg(regs, regnum, val, nat);		return;	}	/*	 * take care of r0 (read-only always evaluate to 0)	 */	if (regnum == 0) {		*val = 0;		if (nat)			*nat = 0;		return;	}	/*	 * Now look at registers in [0-31] range and init correct UNAT	 */	if (GR_IN_SW(regnum)) {		addr = (unsigned long)sw;		unat = &sw->ar_unat;	} else {		addr = (unsigned long)regs;		unat = &sw->caller_unat;	}	DPRINT(("addr_base=%lx offset=0x%x\n", addr,  GR_OFFS(regnum)));	addr += GR_OFFS(regnum);	*val  = *(unsigned long *)addr;	/*	 * do it only when requested	 */	if (nat)		*nat  = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;}static voidemulate_load_updates(update_t type, load_store_t *ld, struct pt_regs *regs, unsigned long ifa){			/*	 * IMPORTANT: 	 * Given the way we handle unaligned speculative loads, we should	 * not get to this point in the code but we keep this sanity check,	 * just in case.	 */	if (ld->x6_op == 1 || ld->x6_op == 3) {		printk(KERN_ERR __FUNCTION__": register update on speculative load, error\n");			die_if_kernel("unaligned reference on specualtive load with register update\n",			      regs, 30);	}	/*	 * at this point, we know that the base register to update is valid i.e.,	 * it's not r0	 */	if (type == UPD_IMMEDIATE) {		unsigned long imm;		/* 	 	 * Load +Imm: ldXZ r1=[r3],imm(9)	 	 *		 *	   	 * form imm9: [13:19] contain the first 7 bits      		 */		 		imm = ld->x << 7 | ld->imm;		/*		 * sign extend (1+8bits) if m set		 */		if (ld->m) imm |= SIGN_EXT9;		/*		 * ifa == r3 and we know that the NaT bit on r3 was clear so		 * we can directly use ifa.		 */		ifa += imm;		setreg(ld->r3, ifa, 0, regs);		DPRINT(("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld->x, ld->m, imm, ifa));	} else if (ld->m) {		unsigned long r2;		int nat_r2;		/*		 * Load +Reg Opcode: ldXZ r1=[r3],r2		 *		 * Note: that we update r3 even in the case of ldfX.a 		 * (where the load does not happen)		 *		 * The way the load algorithm works, we know that r3 does not		 * have its NaT bit set (would have gotten NaT consumption		 * before getting the unaligned fault). So we can use ifa 		 * which equals r3 at this point.		 *		 * IMPORTANT:	 	 * The above statement holds ONLY because we know that we		 * never reach this code when trying to do a ldX.s.		 * If we ever make it to here on an ldfX.s then 		 */		getreg(ld->imm, &r2, &nat_r2, regs);				ifa += r2;				/*		 * propagate Nat r2 -> r3		 */		setreg(ld->r3, ifa, nat_r2, regs);		DPRINT(("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld->imm, r2, ifa, nat_r2));	}}static intemulate_load_int(unsigned long ifa, load_store_t *ld, struct pt_regs *regs){	unsigned long val;	unsigned int len = 1<< ld->x6_sz;	/*	 * the macro supposes sequential access (which is the case)	 * if the first byte is an invalid address we return here. Otherwise	 * there is a guard page at the top of the user's address page and 	 * the first access would generate a NaT consumption fault and return	 * with a SIGSEGV, which is what we want.	 *	 * Note: the first argument is ignored 	 */	if (access_ok(VERIFY_READ, (void *)ifa, len) < 0) {		DPRINT(("verify area failed on %lx\n", ifa));		return -1;	}	/*	 * r0, as target, doesn't need to be checked because Illegal Instruction	 * faults have higher priority than unaligned faults.	 *	 * r0 cannot be found as the base as it would never generate an 	 * unaligned reference.	 */	/*	 * ldX.a we don't try to emulate anything but we must	 * invalidate the ALAT entry.	 * See comment below for explanation on how we handle ldX.a	 */	if (ld->x6_op != 0x2) {		/*		 * we rely on the macros in unaligned.h for now i.e.,		 * we let the compiler figure out how to read memory gracefully.		 *		 * We need this switch/case because the way the inline function		 * works. The code is optimized by the compiler and looks like		 * a single switch/case.		 */		switch(len) {			case 2:				val = ia64_get_unaligned((void *)ifa, 2);				break;			case 4:				val = ia64_get_unaligned((void *)ifa, 4);				break;			case 8:				val = ia64_get_unaligned((void *)ifa, 8);				break;			default:				DPRINT(("unknown size: x6=%d\n", ld->x6_sz));				return -1;		}		setreg(ld->r1, val, 0, regs);	}	/*	 * check for updates on any kind of loads	 */	if (ld->op == 0x5 || ld->m)		emulate_load_updates(ld->op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);	/*	 * handling of various loads (based on EAS2.4):	 *	 * ldX.acq (ordered load):	 *	- acquire semantics would have been used, so force fence instead.	 *	 *	 * ldX.c.clr (check load and clear):	 *	- if we get to this handler, it's because the entry was not in the ALAT.	 *	  Therefore the operation reverts to a normal load	 *	 * ldX.c.nc (check load no clear):	 *	- same as previous one	 *	 * ldX.c.clr.acq (ordered check load and clear):	 *	- same as above for c.clr part. The load needs to have acquire semantics. So	 *	  we use the fence semantics which is stronger and thus ensures correctness.	 *		 * ldX.a (advanced load):	 *	- suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the 	 * 	  address doesn't match requested size alignement. This means that we would 	 *	  possibly need more than one load to get the result.	 *	 *	  The load part can be handled just like a normal load, however the difficult	 *	  part is to get the right thing into the ALAT. The critical piece of information	 * 	  in the base address of the load & size. To do that, a ld.a must be executed,	 *	  clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now	 *	  if we use the same target register, we will be okay for the check.a instruction.	 *	  If we look at the store, basically a stX [r3]=r1 checks the ALAT  for any entry	 *	  which would overlap within [r3,r3+X] (the size of the load was store in the	 *	  ALAT). If such an entry is found the entry is invalidated. But this is not good	 *	  enough, take the following example:	 *		r3=3	 *		ld4.a r1=[r3]	 *	 *	  Could be emulated by doing:	 *		ld1.a r1=[r3],1	 *		store to temporary;	 *		ld1.a r1=[r3],1	 *		store & shift to temporary;	 *		ld1.a r1=[r3],1	 *		store & shift to temporary;	 *		ld1.a r1=[r3]	 *		store & shift to temporary;	 * 		r1=temporary	 *	 *	  So int this case, you would get the right value is r1 but the wrong info in	 *	  the ALAT.  Notice that you could do it in reverse to finish with address 3	 *	  but you would still get the size wrong.  To get the size right, one needs to	 *	  execute exactly the same kind of load. You could do it from a aligned	 *	  temporary location, but you would get the address wrong.	 *	 *	  So no matter what, it is not possible to emulate an advanced load	 *	  correctly. But is that really critical ?	 *	 *	 *	  Now one has to look at how ld.a is used, one must either do a ld.c.* or	 *	  chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no	 *	  entry found in ALAT), and that's perfectly ok because:	 *	 *		- ld.c.*, if the entry is not present a  normal load is executed	 *		- chk.a.*, if the entry is not present, execution jumps to recovery code	 *	 *	  In either case, the load can be potentially retried in another form.	 *	 *	  So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT	 *	  must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up	 *	  a stale entry later) The register base update MUST also be performed.	 *	  	 *	  Now what is the content of the register and its NaT bit in the case we don't	 *	  do the load ?  EAS2.4, says (in case an actual load is needed)	 *	 *		- r1 = [r3], Nat = 0 if succeeds	 *		- r1 = 0 Nat = 0 if trying to access non-speculative memory	 *	 *	  For us, there is nothing to do, because both ld.c.* and chk.a.* are going to	 *	  retry and thus eventually reload the register thereby changing Nat and	 *	  register content.	 */	/*	 * when the load has the .acq completer then 	 * use ordering fence.	 */	if (ld->x6_op == 0x5 || ld->x6_op == 0xa)		mb();	/*	 * invalidate ALAT entry in case of advanced load	 */	if (ld->x6_op == 0x2)		invala_gr(ld->r1);	return 0;}static intemulate_store_int(unsigned long ifa, load_store_t *ld, struct pt_regs *regs){	unsigned long r2;	unsigned int len = 1<< ld->x6_sz;		/*	 * the macro supposes sequential access (which is the case)	 * if the first byte is an invalid address we return here. Otherwise	 * there is a guard page at the top of the user's address page and 	 * the first access would generate a NaT consumption fault and return	 * with a SIGSEGV, which is what we want.	 *	 * Note: the first argument is ignored 	 */	if (access_ok(VERIFY_WRITE, (void *)ifa, len) < 0) {		DPRINT(("verify area failed on %lx\n",ifa));		return -1;	}	/*	 * if we get to this handler, Nat bits on both r3 and r2 have already	 * been checked. so we don't need to do it	 *	 * extract the value to be stored	 */	getreg(ld->imm, &r2, 0, regs);	/*	 * we rely on the macros in unaligned.h for now i.e.,	 * we let the compiler figure out how to read memory gracefully.	 *	 * We need this switch/case because the way the inline function	 * works. The code is optimized by the compiler and looks like	 * a single switch/case.	 */	DPRINT(("st%d [%lx]=%lx\n", len, ifa, r2));	switch(len) {		case 2:			ia64_put_unaligned(r2, (void *)ifa, 2);			break;		case 4:			ia64_put_unaligned(r2, (void *)ifa, 4);			break;		case 8:			ia64_put_unaligned(r2, (void *)ifa, 8);			break;		default:			DPRINT(("unknown size: x6=%d\n", ld->x6_sz));			return -1;	}	/*	 * stX [r3]=r2,imm(9)	 *	 * NOTE:	 * ld->r3 can never be r0, because r0 would not generate an 	 * unaligned access.	 */	if (ld->op == 0x5) {		unsigned long imm;		/*		 * form imm9: [12:6] contain first 7bits		 */		imm = ld->x << 7 | ld->r1;		/*		 * sign extend (8bits) if m set		 */		if (ld->m) imm |= SIGN_EXT9; 		/*		 * ifa == r3 (NaT is necessarily cleared)		 */		ifa += imm;		DPRINT(("imm=%lx r3=%lx\n", imm, ifa));			setreg(ld->r3, ifa, 0, regs);	}	/*	 * we don't have alat_invalidate_multiple() so we need	 * to do the complete flush :-<<	 */	ia64_invala();	/*	 * stX.rel: use fence instead of release	 */	if (ld->x6_op == 0xd)		mb();	return 0;}/* * floating point operations sizes in bytes */static const unsigned short float_fsz[4]={	16, /* extended precision (e) */	8,  /* integer (8)            */	4,  /* single precision (s)   */	8   /* double precision (d)   */};static inline void mem2float_extended(struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline void mem2float_integer(struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline void mem2float_single(struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline void mem2float_double(struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline void float2mem_extended(struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline void float2mem_integer(struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline void float2mem_single(struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline void float2mem_double(struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static intemulate_load_floatpair(unsigned long ifa, load_store_t *ld, struct pt_regs *regs){	struct ia64_fpreg fpr_init[2];	struct ia64_fpreg fpr_final[2];	unsigned long len = float_fsz[ld->x6_sz];	if (access_ok(VERIFY_READ, (void *)ifa, len<<1) < 0) {		DPRINT(("verify area failed on %lx\n", ifa));		return -1;	}	/*	 * fr0 & fr1 don't need to be checked because Illegal Instruction	 * faults have higher priority than unaligned faults.	 *	 * r0 cannot be found as the base as it would never generate an 	 * unaligned reference.	 */	/* 	 * make sure we get clean buffers	 */	memset(&fpr_init,0, sizeof(fpr_init));	memset(&fpr_final,0, sizeof(fpr_final));	/*	 * ldfpX.a: we don't try to emulate anything but we must	 * invalidate the ALAT entry and execute updates, if any.	 */	if (ld->x6_op != 0x2) {		/*		 * does the unaligned access		 */		memcpy(&fpr_init[0], (void *)ifa, len);		memcpy(&fpr_init[1], (void *)(ifa+len), len);		DPRINT(("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld->r1, ld->imm, ld->x6_sz));#ifdef DEBUG_UNALIGNED_TRAP		{ int i; char *c = (char *)&fpr_init;			printk("fpr_init= ");			for(i=0; i < len<<1; i++ ) {				printk("%02x ", c[i]&0xff);			}			printk("\n");		}#endif		/*		 * XXX fixme		 * Could optimize inlines by using ldfpX & 2 spills 		 */		switch( ld->x6_sz ) {			case 0:				mem2float_extended(&fpr_init[0], &fpr_final[0]);
💿 文件大小 23808 K
👤 上传用户 pore
📂 所属分类网络
🏷️ 相关标签

#Linux #lt #gt #内核
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -