📄 traps.c
字号:
case 2: if ( guest_inw_okay(port, v, regs) ) io_emul(regs); else regs->eax |= (u16)~0; break; case 4: if ( guest_inl_okay(port, v, regs) ) io_emul(regs); else regs->eax = (u32)~0; break; } bpmatch = check_guest_io_breakpoint(v, port, op_bytes); goto done; case 0xec: /* IN %dx,%al */ op_bytes = 1; case 0xed: /* IN %dx,%eax */ port = (u16)regs->edx; goto exec_in; case 0xe6: /* OUT %al,imm8 */ op_bytes = 1; case 0xe7: /* OUT %eax,imm8 */ port = insn_fetch(u8, code_base, eip, code_limit); io_emul_stub[14] = port; /* imm8 */ exec_out: if ( !guest_io_okay(port, op_bytes, v, regs) ) goto fail; switch ( op_bytes ) { case 1: if ( guest_outb_okay(port, v, regs) ) { io_emul(regs); if ( pv_post_outb_hook ) pv_post_outb_hook(port, regs->eax); } else if ( port == 0x42 || port == 0x43 || port == 0x61 ) pv_pit_handler(port, regs->eax, 1); break; case 2: if ( guest_outw_okay(port, v, regs) ) io_emul(regs); break; case 4: if ( guest_outl_okay(port, v, regs) ) io_emul(regs); break; } bpmatch = check_guest_io_breakpoint(v, port, op_bytes); goto done; case 0xee: /* OUT %al,%dx */ op_bytes = 1; case 0xef: /* OUT %eax,%dx */ port = (u16)regs->edx; goto exec_out; case 0xfa: /* CLI */ case 0xfb: /* STI */ if ( v->arch.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) ) goto fail; /* * This is just too dangerous to allow, in my opinion. Consider if the * caller then tries to reenable interrupts using POPF: we can't trap * that and we'll end up with hard-to-debug lockups. Fast & loose will * do for us. :-) */ /*v->vcpu_info->evtchn_upcall_mask = (opcode == 0xfa);*/ goto done; } /* No decode of this single-byte opcode. */ goto fail; twobyte_opcode: /* Two-byte opcodes only emulated from guest kernel. */ if ( !guest_kernel_mode(v, regs) ) goto fail; /* Privileged (ring 0) instructions. */ opcode = insn_fetch(u8, code_base, eip, code_limit); if ( lock && (opcode & ~3) != 0x20 ) goto fail; switch ( opcode ) { case 0x06: /* CLTS */ (void)do_fpu_taskswitch(0); break; case 0x09: /* WBINVD */ /* Ignore the instruction if unprivileged. */ if ( !cache_flush_permitted(v->domain) ) /* Non-physdev domain attempted WBINVD; ignore for now since newer linux uses this in some start-of-day timing loops */ ; else wbinvd(); break; case 0x20: /* MOV CR?,<reg> */ opcode = insn_fetch(u8, code_base, eip, code_limit); modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); switch ( modrm_reg ) { case 0: /* Read CR0 */ *reg = (read_cr0() & ~X86_CR0_TS) | v->arch.guest_context.ctrlreg[0]; break; case 2: /* Read CR2 */ *reg = v->arch.guest_context.ctrlreg[2]; break; case 3: /* Read CR3 */ if ( !is_pv_32on64_vcpu(v) ) *reg = xen_pfn_to_cr3(mfn_to_gmfn( v->domain, pagetable_get_pfn(v->arch.guest_table)));#ifdef CONFIG_COMPAT else *reg = compat_pfn_to_cr3(mfn_to_gmfn( v->domain, l4e_get_pfn(*(l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)))));#endif break; case 4: /* Read CR4 */ /* * Guests can read CR4 to see what features Xen has enabled. We * therefore lie about PGE & PSE as they are unavailable to guests. */ *reg = read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE); break; default: goto fail; } break; case 0x21: /* MOV DR?,<reg> */ opcode = insn_fetch(u8, code_base, eip, code_limit); modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); if ( (res = do_get_debugreg(modrm_reg)) > (unsigned long)-256 ) goto fail; *reg = res; break; case 0x22: /* MOV <reg>,CR? */ opcode = insn_fetch(u8, code_base, eip, code_limit); modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); switch ( modrm_reg ) { case 0: /* Write CR0 */ if ( (*reg ^ read_cr0()) & ~X86_CR0_TS ) { gdprintk(XENLOG_WARNING, "Attempt to change unmodifiable CR0 flags.\n"); goto fail; } (void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS)); break; case 2: /* Write CR2 */ v->arch.guest_context.ctrlreg[2] = *reg; arch_set_cr2(v, *reg); break; case 3: /* Write CR3 */ LOCK_BIGLOCK(v->domain); if ( !is_pv_32on64_vcpu(v) ) rc = new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg)));#ifdef CONFIG_COMPAT else rc = new_guest_cr3(gmfn_to_mfn(v->domain, compat_cr3_to_pfn(*reg)));#endif UNLOCK_BIGLOCK(v->domain); if ( rc == 0 ) /* not okay */ goto fail; break; case 4: /* Write CR4 */ v->arch.guest_context.ctrlreg[4] = pv_guest_cr4_fixup(*reg); write_cr4(pv_guest_cr4_to_real_cr4( v->arch.guest_context.ctrlreg[4])); break; default: goto fail; } break; case 0x23: /* MOV <reg>,DR? */ opcode = insn_fetch(u8, code_base, eip, code_limit); modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); if ( do_set_debugreg(modrm_reg, *reg) != 0 ) goto fail; break; case 0x30: /* WRMSR */ eax = regs->eax; edx = regs->edx; res = ((u64)edx << 32) | eax; switch ( (u32)regs->ecx ) {#ifdef CONFIG_X86_64 case MSR_FS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; if ( wrmsr_safe(MSR_FS_BASE, eax, edx) ) goto fail; v->arch.guest_context.fs_base = res; break; case MSR_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; if ( wrmsr_safe(MSR_GS_BASE, eax, edx) ) goto fail; v->arch.guest_context.gs_base_kernel = res; break; case MSR_SHADOW_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; if ( wrmsr_safe(MSR_SHADOW_GS_BASE, eax, edx) ) goto fail; v->arch.guest_context.gs_base_user = res; break;#endif case MSR_K7_FID_VID_STATUS: case MSR_K7_FID_VID_CTL: case MSR_K8_PSTATE_LIMIT: case MSR_K8_PSTATE_CTRL: case MSR_K8_PSTATE_STATUS: case MSR_K8_PSTATE0: case MSR_K8_PSTATE1: case MSR_K8_PSTATE2: case MSR_K8_PSTATE3: case MSR_K8_PSTATE4: case MSR_K8_PSTATE5: case MSR_K8_PSTATE6: case MSR_K8_PSTATE7: if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ) goto fail; if ( cpufreq_controller != FREQCTL_dom0_kernel ) break; if ( wrmsr_safe(regs->ecx, eax, edx) != 0 ) goto fail; break; case MSR_IA32_PERF_CTL: if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) goto fail; if ( cpufreq_controller != FREQCTL_dom0_kernel ) break; if ( wrmsr_safe(regs->ecx, eax, edx) != 0 ) goto fail; break; default: if ( wrmsr_hypervisor_regs(regs->ecx, eax, edx) ) break; if ( (rdmsr_safe(regs->ecx, l, h) != 0) || (eax != l) || (edx != h) ) gdprintk(XENLOG_WARNING, "Domain attempted WRMSR %p from " "%08x:%08x to %08x:%08x.\n", _p(regs->ecx), h, l, edx, eax); break; } break; case 0x31: /* RDTSC */ rdtsc(regs->eax, regs->edx); break; case 0x32: /* RDMSR */ switch ( (u32)regs->ecx ) {#ifdef CONFIG_X86_64 case MSR_FS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; regs->eax = v->arch.guest_context.fs_base & 0xFFFFFFFFUL; regs->edx = v->arch.guest_context.fs_base >> 32; break; case MSR_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; regs->eax = v->arch.guest_context.gs_base_kernel & 0xFFFFFFFFUL; regs->edx = v->arch.guest_context.gs_base_kernel >> 32; break; case MSR_SHADOW_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; regs->eax = v->arch.guest_context.gs_base_user & 0xFFFFFFFFUL; regs->edx = v->arch.guest_context.gs_base_user >> 32; break;#endif case MSR_K7_FID_VID_CTL: case MSR_K7_FID_VID_STATUS: case MSR_K8_PSTATE_LIMIT: case MSR_K8_PSTATE_CTRL: case MSR_K8_PSTATE_STATUS: case MSR_K8_PSTATE0: case MSR_K8_PSTATE1: case MSR_K8_PSTATE2: case MSR_K8_PSTATE3: case MSR_K8_PSTATE4: case MSR_K8_PSTATE5: case MSR_K8_PSTATE6: case MSR_K8_PSTATE7: if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ) goto fail; if ( cpufreq_controller != FREQCTL_dom0_kernel ) { regs->eax = regs->edx = 0; break; } if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) != 0 ) goto fail; break; case MSR_EFER: if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) ) goto fail; break; case MSR_IA32_MISC_ENABLE: if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) ) goto fail; regs->eax &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL | MSR_IA32_MISC_ENABLE_MONITOR_ENABLE); regs->eax |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL | MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | MSR_IA32_MISC_ENABLE_XTPR_DISABLE; break; default: if ( rdmsr_hypervisor_regs(regs->ecx, &l, &h) ) { regs->eax = l; regs->edx = h; break; } /* Everyone can read the MSR space. */ /* gdprintk(XENLOG_WARNING,"Domain attempted RDMSR %p.\n", _p(regs->ecx));*/ if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) ) goto fail; break; } break; default: goto fail; }#undef wr_ad#undef rd_ad done: instruction_done(regs, eip, bpmatch); skip: return EXCRET_fault_fixed; fail: return 0;}static inline int check_stack_limit(unsigned int ar, unsigned int limit, unsigned int esp, unsigned int decr){ return (((esp - decr) < (esp - 1)) && (!(ar & _SEGMENT_EC) ? (esp - 1) <= limit : (esp - decr) > limit));}static void emulate_gate_op(struct cpu_user_regs *regs){#ifdef __x86_64__ struct vcpu *v = current; unsigned int sel, ar, dpl, nparm, opnd_sel; unsigned int op_default, op_bytes, ad_default, ad_bytes; unsigned long off, eip, opnd_off, base, limit; int jump; /* Check whether this fault is due to the use of a call gate. */ if ( !read_gate_descriptor(regs->error_code, v, &sel, &off, &ar) || (((ar >> 13) & 3) < (regs->cs & 3)) || ((ar & _SEGMENT_TYPE) != 0xc00) ) { do_guest_trap(TRAP_gp_fault, regs, 1); return; } if ( !(ar & _SEGMENT_P) ) { do_guest_trap(TRAP_no_segment, regs, 1); return; } dpl = (ar >> 13) & 3; nparm = ar & 0x1f; /* * Decode instruction (and perhaps operand) to determine RPL, * whether this is a jump or a call, and the call return offset. */ if ( !read_descriptor(regs->cs, v, regs, &base, &limit, &ar, 0) || !(ar & _SEGMENT_S) || !(ar & _SEGMENT_P) || !(ar & _SEGMENT_CODE) ) { do_guest_trap(TRAP_gp_fault, regs, 1); return; } op_bytes = op_default = ar & _SEGMENT_DB ? 4 : 2; ad_default = ad_bytes = op_default; opnd_sel = opnd_off = 0; jump = -1; for ( eip = regs->eip; eip - regs->_eip < 10; ) { switch ( insn_fetch(u8, base, eip, limit) ) { case 0x66: /* operand-size override */ op_bytes = op_default ^ 6; /* switch between 2/4 bytes */ continue; case 0x67: /* address-size override */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -