📄 unaligned.c

📁 嵌入式系统设计与实例开发实验教材二源码多线程应用程序设计串行端口程序设计 AD接口实验 CAN总线通信实验 GPS通信实验 Linux内核移植与编译实验 IC卡读写实验 SD驱动使
💻 C
📖 第 1 页 / 共 3 页
字号:
	 * To update f32-f127, there are three choices:	 *	 *	(1) save f32-f127 to thread.fph and update the values there	 *	(2) use a gigantic switch statement to directly access the registers	 *	(3) generate code on the fly to update the desired register	 *	 * For now, we are using approach (1).	 */	if (regnum >= IA64_FIRST_ROTATING_FR) {		ia64_sync_fph(current);		current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval;	} else {		/*		 * pt_regs or switch_stack ?		 */		if (FR_IN_SW(regnum)) {			addr = (unsigned long)sw;		} else {			addr = (unsigned long)regs;		}		DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));		addr += FR_OFFS(regnum);		*(struct ia64_fpreg *)addr = *fpval;		/*		 * mark the low partition as being used now		 *		 * It is highly unlikely that this bit is not already set, but		 * let's do it for safety.		 */		regs->cr_ipsr |= IA64_PSR_MFL;	}}/* * Those 2 inline functions generate the spilled versions of the constant floating point * registers which can be used with stfX */static inline voidfloat_spill_f0 (struct ia64_fpreg *final){	__asm__ __volatile__ ("stf.spill [%0]=f0" :: "r"(final) : "memory");}static inline voidfloat_spill_f1 (struct ia64_fpreg *final){	__asm__ __volatile__ ("stf.spill [%0]=f1" :: "r"(final) : "memory");}static voidgetfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs){	struct switch_stack *sw = (struct switch_stack *) regs - 1;	unsigned long addr;	/*	 * From EAS-2.5: FPDisableFault has higher priority than	 * Unaligned Fault. Thus, when we get here, we know the partition is	 * enabled.	 *	 * When regnum > 31, the register is still live and we need to force a save	 * to current->thread.fph to get access to it.  See discussion in setfpreg()	 * for reasons and other ways of doing this.	 */	if (regnum >= IA64_FIRST_ROTATING_FR) {		ia64_flush_fph(current);		*fpval = current->thread.fph[IA64_FPH_OFFS(regnum)];	} else {		/*		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus		 * not saved, we must generate their spilled form on the fly		 */		switch(regnum) {		case 0:			float_spill_f0(fpval);			break;		case 1:			float_spill_f1(fpval);			break;		default:			/*			 * pt_regs or switch_stack ?			 */			addr =  FR_IN_SW(regnum) ? (unsigned long)sw						 : (unsigned long)regs;			DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",			       FR_IN_SW(regnum), addr, FR_OFFS(regnum));			addr  += FR_OFFS(regnum);			*fpval = *(struct ia64_fpreg *)addr;		}	}}static voidgetreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs){	struct switch_stack *sw = (struct switch_stack *) regs - 1;	unsigned long addr, *unat;	if (regnum >= IA64_FIRST_STACKED_GR) {		get_rse_reg(regs, regnum, val, nat);		return;	}	/*	 * take care of r0 (read-only always evaluate to 0)	 */	if (regnum == 0) {		*val = 0;		if (nat)			*nat = 0;		return;	}	/*	 * Now look at registers in [0-31] range and init correct UNAT	 */	if (GR_IN_SW(regnum)) {		addr = (unsigned long)sw;		unat = &sw->ar_unat;	} else {		addr = (unsigned long)regs;		unat = &sw->caller_unat;	}	DPRINT("addr_base=%lx offset=0x%x\n", addr,  GR_OFFS(regnum));	addr += GR_OFFS(regnum);	*val  = *(unsigned long *)addr;	/*	 * do it only when requested	 */	if (nat)		*nat  = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;}static voidemulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa){	/*	 * IMPORTANT:	 * Given the way we handle unaligned speculative loads, we should	 * not get to this point in the code but we keep this sanity check,	 * just in case.	 */	if (ld.x6_op == 1 || ld.x6_op == 3) {		printk(KERN_ERR __FUNCTION__": register update on speculative load, error\n");		die_if_kernel("unaligned reference on specualtive load with register update\n",			      regs, 30);	}	/*	 * at this point, we know that the base register to update is valid i.e.,	 * it's not r0	 */	if (type == UPD_IMMEDIATE) {		unsigned long imm;		/*		 * Load +Imm: ldXZ r1=[r3],imm(9)		 *		 *		 * form imm9: [13:19] contain the first 7 bits		 */		imm = ld.x << 7 | ld.imm;		/*		 * sign extend (1+8bits) if m set		 */		if (ld.m) imm |= SIGN_EXT9;		/*		 * ifa == r3 and we know that the NaT bit on r3 was clear so		 * we can directly use ifa.		 */		ifa += imm;		setreg(ld.r3, ifa, 0, regs);		DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);	} else if (ld.m) {		unsigned long r2;		int nat_r2;		/*		 * Load +Reg Opcode: ldXZ r1=[r3],r2		 *		 * Note: that we update r3 even in the case of ldfX.a		 * (where the load does not happen)		 *		 * The way the load algorithm works, we know that r3 does not		 * have its NaT bit set (would have gotten NaT consumption		 * before getting the unaligned fault). So we can use ifa		 * which equals r3 at this point.		 *		 * IMPORTANT:		 * The above statement holds ONLY because we know that we		 * never reach this code when trying to do a ldX.s.		 * If we ever make it to here on an ldfX.s then		 */		getreg(ld.imm, &r2, &nat_r2, regs);		ifa += r2;		/*		 * propagate Nat r2 -> r3		 */		setreg(ld.r3, ifa, nat_r2, regs);		DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);	}}static intemulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs){	unsigned int len = 1 << ld.x6_sz;	/*	 * r0, as target, doesn't need to be checked because Illegal Instruction	 * faults have higher priority than unaligned faults.	 *	 * r0 cannot be found as the base as it would never generate an	 * unaligned reference.	 */	/*	 * ldX.a we don't try to emulate anything but we must invalidate the ALAT entry.	 * See comment below for explanation on how we handle ldX.a	 */	if (ld.x6_op != 0x2) {		unsigned long val = 0;		if (len != 2 && len != 4 && len != 8) {			DPRINT("unknown size: x6=%d\n", ld.x6_sz);			return -1;		}		/* this assumes little-endian byte-order: */		if (copy_from_user(&val, (void *) ifa, len))		    return -1;		setreg(ld.r1, val, 0, regs);	}	/*	 * check for updates on any kind of loads	 */	if (ld.op == 0x5 || ld.m)		emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);	/*	 * handling of various loads (based on EAS2.4):	 *	 * ldX.acq (ordered load):	 *	- acquire semantics would have been used, so force fence instead.	 *	 * ldX.c.clr (check load and clear):	 *	- if we get to this handler, it's because the entry was not in the ALAT.	 *	  Therefore the operation reverts to a normal load	 *	 * ldX.c.nc (check load no clear):	 *	- same as previous one	 *	 * ldX.c.clr.acq (ordered check load and clear):	 *	- same as above for c.clr part. The load needs to have acquire semantics. So	 *	  we use the fence semantics which is stronger and thus ensures correctness.	 *	 * ldX.a (advanced load):	 *	- suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the	 *	  address doesn't match requested size alignement. This means that we would	 *	  possibly need more than one load to get the result.	 *	 *	  The load part can be handled just like a normal load, however the difficult	 *	  part is to get the right thing into the ALAT. The critical piece of information	 *	  in the base address of the load & size. To do that, a ld.a must be executed,	 *	  clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now	 *	  if we use the same target register, we will be okay for the check.a instruction.	 *	  If we look at the store, basically a stX [r3]=r1 checks the ALAT  for any entry	 *	  which would overlap within [r3,r3+X] (the size of the load was store in the	 *	  ALAT). If such an entry is found the entry is invalidated. But this is not good	 *	  enough, take the following example:	 *		r3=3	 *		ld4.a r1=[r3]	 *	 *	  Could be emulated by doing:	 *		ld1.a r1=[r3],1	 *		store to temporary;	 *		ld1.a r1=[r3],1	 *		store & shift to temporary;	 *		ld1.a r1=[r3],1	 *		store & shift to temporary;	 *		ld1.a r1=[r3]	 *		store & shift to temporary;	 *		r1=temporary	 *	 *	  So int this case, you would get the right value is r1 but the wrong info in	 *	  the ALAT.  Notice that you could do it in reverse to finish with address 3	 *	  but you would still get the size wrong.  To get the size right, one needs to	 *	  execute exactly the same kind of load. You could do it from a aligned	 *	  temporary location, but you would get the address wrong.	 *	 *	  So no matter what, it is not possible to emulate an advanced load	 *	  correctly. But is that really critical ?	 *	 *	 *	  Now one has to look at how ld.a is used, one must either do a ld.c.* or	 *	  chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no	 *	  entry found in ALAT), and that's perfectly ok because:	 *	 *		- ld.c.*, if the entry is not present a  normal load is executed	 *		- chk.a.*, if the entry is not present, execution jumps to recovery code	 *	 *	  In either case, the load can be potentially retried in another form.	 *	 *	  So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT	 *	  must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up	 *	  a stale entry later) The register base update MUST also be performed.	 *	 *	  Now what is the content of the register and its NaT bit in the case we don't	 *	  do the load ?  EAS2.4, says (in case an actual load is needed)	 *	 *		- r1 = [r3], Nat = 0 if succeeds	 *		- r1 = 0 Nat = 0 if trying to access non-speculative memory	 *	 *	  For us, there is nothing to do, because both ld.c.* and chk.a.* are going to	 *	  retry and thus eventually reload the register thereby changing Nat and	 *	  register content.	 */	/*	 * when the load has the .acq completer then	 * use ordering fence.	 */	if (ld.x6_op == 0x5 || ld.x6_op == 0xa)		mb();	/*	 * invalidate ALAT entry in case of advanced load	 */	if (ld.x6_op == 0x2)		invala_gr(ld.r1);	return 0;}static intemulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs){	unsigned long r2;	unsigned int len = 1 << ld.x6_sz;	/*	 * if we get to this handler, Nat bits on both r3 and r2 have already	 * been checked. so we don't need to do it	 *	 * extract the value to be stored	 */	getreg(ld.imm, &r2, 0, regs);	/*	 * we rely on the macros in unaligned.h for now i.e.,	 * we let the compiler figure out how to read memory gracefully.	 *	 * We need this switch/case because the way the inline function	 * works. The code is optimized by the compiler and looks like	 * a single switch/case.	 */	DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);	if (len != 2 && len != 4 && len != 8) {		DPRINT("unknown size: x6=%d\n", ld.x6_sz);		return -1;	}	/* this assumes little-endian byte-order: */	if (copy_to_user((void *) ifa, &r2, len))		return -1;	/*	 * stX [r3]=r2,imm(9)	 *	 * NOTE:	 * ld.r3 can never be r0, because r0 would not generate an	 * unaligned access.	 */	if (ld.op == 0x5) {		unsigned long imm;		/*		 * form imm9: [12:6] contain first 7bits		 */		imm = ld.x << 7 | ld.r1;		/*		 * sign extend (8bits) if m set		 */		if (ld.m) imm |= SIGN_EXT9;		/*		 * ifa == r3 (NaT is necessarily cleared)		 */		ifa += imm;		DPRINT("imm=%lx r3=%lx\n", imm, ifa);		setreg(ld.r3, ifa, 0, regs);	}	/*	 * we don't have alat_invalidate_multiple() so we need	 * to do the complete flush :-<<	 */	ia64_invala();	/*	 * stX.rel: use fence instead of release	 */	if (ld.x6_op == 0xd)		mb();	return 0;}/* * floating point operations sizes in bytes */static const unsigned char float_fsz[4]={	10, /* extended precision (e) */	8,  /* integer (8)            */	4,  /* single precision (s)   */	8   /* double precision (d)   */};static inline voidmem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline voidmem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline voidmem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline voidmem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline voidfloat2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline voidfloat2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline voidfloat2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline voidfloat2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static intemulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
💿 文件大小 18508 K
👤 上传用户 scauliaorongjun
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#实验 #Linux #CAN #GPS
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -