⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 unaligned.c

📁 嵌入式系统设计与实例开发实验教材二源码 多线程应用程序设计 串行端口程序设计 AD接口实验 CAN总线通信实验 GPS通信实验 Linux内核移植与编译实验 IC卡读写实验 SD驱动使
💻 C
📖 第 1 页 / 共 3 页
字号:
	 * To update f32-f127, there are three choices:	 *	 *	(1) save f32-f127 to thread.fph and update the values there	 *	(2) use a gigantic switch statement to directly access the registers	 *	(3) generate code on the fly to update the desired register	 *	 * For now, we are using approach (1).	 */	if (regnum >= IA64_FIRST_ROTATING_FR) {		ia64_sync_fph(current);		current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval;	} else {		/*		 * pt_regs or switch_stack ?		 */		if (FR_IN_SW(regnum)) {			addr = (unsigned long)sw;		} else {			addr = (unsigned long)regs;		}		DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));		addr += FR_OFFS(regnum);		*(struct ia64_fpreg *)addr = *fpval;		/*		 * mark the low partition as being used now		 *		 * It is highly unlikely that this bit is not already set, but		 * let's do it for safety.		 */		regs->cr_ipsr |= IA64_PSR_MFL;	}}/* * Those 2 inline functions generate the spilled versions of the constant floating point * registers which can be used with stfX */static inline voidfloat_spill_f0 (struct ia64_fpreg *final){	__asm__ __volatile__ ("stf.spill [%0]=f0" :: "r"(final) : "memory");}static inline voidfloat_spill_f1 (struct ia64_fpreg *final){	__asm__ __volatile__ ("stf.spill [%0]=f1" :: "r"(final) : "memory");}static voidgetfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs){	struct switch_stack *sw = (struct switch_stack *) regs - 1;	unsigned long addr;	/*	 * From EAS-2.5: FPDisableFault has higher priority than	 * Unaligned Fault. Thus, when we get here, we know the partition is	 * enabled.	 *	 * When regnum > 31, the register is still live and we need to force a save	 * to current->thread.fph to get access to it.  See discussion in setfpreg()	 * for reasons and other ways of doing this.	 */	if (regnum >= IA64_FIRST_ROTATING_FR) {		ia64_flush_fph(current);		*fpval = current->thread.fph[IA64_FPH_OFFS(regnum)];	} else {		/*		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus		 * not saved, we must generate their spilled form on the fly		 */		switch(regnum) {		case 0:			float_spill_f0(fpval);			break;		case 1:			float_spill_f1(fpval);			break;		default:			/*			 * pt_regs or switch_stack ?			 */			addr =  FR_IN_SW(regnum) ? (unsigned long)sw						 : (unsigned long)regs;			DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",			       FR_IN_SW(regnum), addr, FR_OFFS(regnum));			addr  += FR_OFFS(regnum);			*fpval = *(struct ia64_fpreg *)addr;		}	}}static voidgetreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs){	struct switch_stack *sw = (struct switch_stack *) regs - 1;	unsigned long addr, *unat;	if (regnum >= IA64_FIRST_STACKED_GR) {		get_rse_reg(regs, regnum, val, nat);		return;	}	/*	 * take care of r0 (read-only always evaluate to 0)	 */	if (regnum == 0) {		*val = 0;		if (nat)			*nat = 0;		return;	}	/*	 * Now look at registers in [0-31] range and init correct UNAT	 */	if (GR_IN_SW(regnum)) {		addr = (unsigned long)sw;		unat = &sw->ar_unat;	} else {		addr = (unsigned long)regs;		unat = &sw->caller_unat;	}	DPRINT("addr_base=%lx offset=0x%x\n", addr,  GR_OFFS(regnum));	addr += GR_OFFS(regnum);	*val  = *(unsigned long *)addr;	/*	 * do it only when requested	 */	if (nat)		*nat  = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;}static voidemulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa){	/*	 * IMPORTANT:	 * Given the way we handle unaligned speculative loads, we should	 * not get to this point in the code but we keep this sanity check,	 * just in case.	 */	if (ld.x6_op == 1 || ld.x6_op == 3) {		printk(KERN_ERR __FUNCTION__": register update on speculative load, error\n");		die_if_kernel("unaligned reference on specualtive load with register update\n",			      regs, 30);	}	/*	 * at this point, we know that the base register to update is valid i.e.,	 * it's not r0	 */	if (type == UPD_IMMEDIATE) {		unsigned long imm;		/*		 * Load +Imm: ldXZ r1=[r3],imm(9)		 *		 *		 * form imm9: [13:19] contain the first 7 bits		 */		imm = ld.x << 7 | ld.imm;		/*		 * sign extend (1+8bits) if m set		 */		if (ld.m) imm |= SIGN_EXT9;		/*		 * ifa == r3 and we know that the NaT bit on r3 was clear so		 * we can directly use ifa.		 */		ifa += imm;		setreg(ld.r3, ifa, 0, regs);		DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);	} else if (ld.m) {		unsigned long r2;		int nat_r2;		/*		 * Load +Reg Opcode: ldXZ r1=[r3],r2		 *		 * Note: that we update r3 even in the case of ldfX.a		 * (where the load does not happen)		 *		 * The way the load algorithm works, we know that r3 does not		 * have its NaT bit set (would have gotten NaT consumption		 * before getting the unaligned fault). So we can use ifa		 * which equals r3 at this point.		 *		 * IMPORTANT:		 * The above statement holds ONLY because we know that we		 * never reach this code when trying to do a ldX.s.		 * If we ever make it to here on an ldfX.s then		 */		getreg(ld.imm, &r2, &nat_r2, regs);		ifa += r2;		/*		 * propagate Nat r2 -> r3		 */		setreg(ld.r3, ifa, nat_r2, regs);		DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);	}}static intemulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs){	unsigned int len = 1 << ld.x6_sz;	/*	 * r0, as target, doesn't need to be checked because Illegal Instruction	 * faults have higher priority than unaligned faults.	 *	 * r0 cannot be found as the base as it would never generate an	 * unaligned reference.	 */	/*	 * ldX.a we don't try to emulate anything but we must invalidate the ALAT entry.	 * See comment below for explanation on how we handle ldX.a	 */	if (ld.x6_op != 0x2) {		unsigned long val = 0;		if (len != 2 && len != 4 && len != 8) {			DPRINT("unknown size: x6=%d\n", ld.x6_sz);			return -1;		}		/* this assumes little-endian byte-order: */		if (copy_from_user(&val, (void *) ifa, len))		    return -1;		setreg(ld.r1, val, 0, regs);	}	/*	 * check for updates on any kind of loads	 */	if (ld.op == 0x5 || ld.m)		emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);	/*	 * handling of various loads (based on EAS2.4):	 *	 * ldX.acq (ordered load):	 *	- acquire semantics would have been used, so force fence instead.	 *	 * ldX.c.clr (check load and clear):	 *	- if we get to this handler, it's because the entry was not in the ALAT.	 *	  Therefore the operation reverts to a normal load	 *	 * ldX.c.nc (check load no clear):	 *	- same as previous one	 *	 * ldX.c.clr.acq (ordered check load and clear):	 *	- same as above for c.clr part. The load needs to have acquire semantics. So	 *	  we use the fence semantics which is stronger and thus ensures correctness.	 *	 * ldX.a (advanced load):	 *	- suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the	 *	  address doesn't match requested size alignement. This means that we would	 *	  possibly need more than one load to get the result.	 *	 *	  The load part can be handled just like a normal load, however the difficult	 *	  part is to get the right thing into the ALAT. The critical piece of information	 *	  in the base address of the load & size. To do that, a ld.a must be executed,	 *	  clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now	 *	  if we use the same target register, we will be okay for the check.a instruction.	 *	  If we look at the store, basically a stX [r3]=r1 checks the ALAT  for any entry	 *	  which would overlap within [r3,r3+X] (the size of the load was store in the	 *	  ALAT). If such an entry is found the entry is invalidated. But this is not good	 *	  enough, take the following example:	 *		r3=3	 *		ld4.a r1=[r3]	 *	 *	  Could be emulated by doing:	 *		ld1.a r1=[r3],1	 *		store to temporary;	 *		ld1.a r1=[r3],1	 *		store & shift to temporary;	 *		ld1.a r1=[r3],1	 *		store & shift to temporary;	 *		ld1.a r1=[r3]	 *		store & shift to temporary;	 *		r1=temporary	 *	 *	  So int this case, you would get the right value is r1 but the wrong info in	 *	  the ALAT.  Notice that you could do it in reverse to finish with address 3	 *	  but you would still get the size wrong.  To get the size right, one needs to	 *	  execute exactly the same kind of load. You could do it from a aligned	 *	  temporary location, but you would get the address wrong.	 *	 *	  So no matter what, it is not possible to emulate an advanced load	 *	  correctly. But is that really critical ?	 *	 *	 *	  Now one has to look at how ld.a is used, one must either do a ld.c.* or	 *	  chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no	 *	  entry found in ALAT), and that's perfectly ok because:	 *	 *		- ld.c.*, if the entry is not present a  normal load is executed	 *		- chk.a.*, if the entry is not present, execution jumps to recovery code	 *	 *	  In either case, the load can be potentially retried in another form.	 *	 *	  So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT	 *	  must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up	 *	  a stale entry later) The register base update MUST also be performed.	 *	 *	  Now what is the content of the register and its NaT bit in the case we don't	 *	  do the load ?  EAS2.4, says (in case an actual load is needed)	 *	 *		- r1 = [r3], Nat = 0 if succeeds	 *		- r1 = 0 Nat = 0 if trying to access non-speculative memory	 *	 *	  For us, there is nothing to do, because both ld.c.* and chk.a.* are going to	 *	  retry and thus eventually reload the register thereby changing Nat and	 *	  register content.	 */	/*	 * when the load has the .acq completer then	 * use ordering fence.	 */	if (ld.x6_op == 0x5 || ld.x6_op == 0xa)		mb();	/*	 * invalidate ALAT entry in case of advanced load	 */	if (ld.x6_op == 0x2)		invala_gr(ld.r1);	return 0;}static intemulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs){	unsigned long r2;	unsigned int len = 1 << ld.x6_sz;	/*	 * if we get to this handler, Nat bits on both r3 and r2 have already	 * been checked. so we don't need to do it	 *	 * extract the value to be stored	 */	getreg(ld.imm, &r2, 0, regs);	/*	 * we rely on the macros in unaligned.h for now i.e.,	 * we let the compiler figure out how to read memory gracefully.	 *	 * We need this switch/case because the way the inline function	 * works. The code is optimized by the compiler and looks like	 * a single switch/case.	 */	DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);	if (len != 2 && len != 4 && len != 8) {		DPRINT("unknown size: x6=%d\n", ld.x6_sz);		return -1;	}	/* this assumes little-endian byte-order: */	if (copy_to_user((void *) ifa, &r2, len))		return -1;	/*	 * stX [r3]=r2,imm(9)	 *	 * NOTE:	 * ld.r3 can never be r0, because r0 would not generate an	 * unaligned access.	 */	if (ld.op == 0x5) {		unsigned long imm;		/*		 * form imm9: [12:6] contain first 7bits		 */		imm = ld.x << 7 | ld.r1;		/*		 * sign extend (8bits) if m set		 */		if (ld.m) imm |= SIGN_EXT9;		/*		 * ifa == r3 (NaT is necessarily cleared)		 */		ifa += imm;		DPRINT("imm=%lx r3=%lx\n", imm, ifa);		setreg(ld.r3, ifa, 0, regs);	}	/*	 * we don't have alat_invalidate_multiple() so we need	 * to do the complete flush :-<<	 */	ia64_invala();	/*	 * stX.rel: use fence instead of release	 */	if (ld.x6_op == 0xd)		mb();	return 0;}/* * floating point operations sizes in bytes */static const unsigned char float_fsz[4]={	10, /* extended precision (e) */	8,  /* integer (8)            */	4,  /* single precision (s)   */	8   /* double precision (d)   */};static inline voidmem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline voidmem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline voidmem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline voidmem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline voidfloat2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline voidfloat2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline voidfloat2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static inline voidfloat2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final){	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6"			      :: "r"(init), "r"(final) : "f6","memory");}static intemulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -