📄 traps.c
字号:
} else read_descriptor(data_sel, v, regs, &data_base, &data_limit, &ar, 0); data_limit = ~0UL; ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P; }#endif port = (u16)regs->edx; continue_io_string: switch ( opcode ) { case 0x6c: /* INSB */ op_bytes = 1; case 0x6d: /* INSW/INSL */ if ( (data_limit < (op_bytes - 1)) || (rd_ad(edi) > (data_limit - (op_bytes - 1))) || !guest_io_okay(port, op_bytes, v, regs) ) goto fail; data = guest_io_read(port, op_bytes, v, regs); if ( (rc = copy_to_user((void *)data_base + rd_ad(edi), &data, op_bytes)) != 0 ) { propagate_page_fault(data_base + rd_ad(edi) + op_bytes - rc, PFEC_write_access); return EXCRET_fault_fixed; } wr_ad(edi, regs->edi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes)); break; case 0x6e: /* OUTSB */ op_bytes = 1; case 0x6f: /* OUTSW/OUTSL */ if ( (data_limit < (op_bytes - 1)) || (rd_ad(esi) > (data_limit - (op_bytes - 1))) || !guest_io_okay(port, op_bytes, v, regs) ) goto fail; if ( (rc = copy_from_user(&data, (void *)data_base + rd_ad(esi), op_bytes)) != 0 ) { propagate_page_fault(data_base + rd_ad(esi) + op_bytes - rc, 0); return EXCRET_fault_fixed; } guest_io_write(port, op_bytes, data, v, regs); wr_ad(esi, regs->esi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes)); break; } bpmatch = check_guest_io_breakpoint(v, port, op_bytes); if ( rep_prefix && (wr_ad(ecx, regs->ecx - 1) != 0) ) { if ( !bpmatch && !hypercall_preempt_check() ) goto continue_io_string; eip = regs->eip; } goto done; } /* * Very likely to be an I/O instruction (IN/OUT). * Build an on-stack stub to execute the instruction with full guest * GPR context. This is needed for some systems which (ab)use IN/OUT * to communicate with BIOS code in system-management mode. */#ifdef __x86_64__ /* movq $host_to_guest_gpr_switch,%rcx */ io_emul_stub[0] = 0x48; io_emul_stub[1] = 0xb9; *(void **)&io_emul_stub[2] = (void *)host_to_guest_gpr_switch; /* callq *%rcx */ io_emul_stub[10] = 0xff; io_emul_stub[11] = 0xd1;#else /* call host_to_guest_gpr_switch */ io_emul_stub[0] = 0xe8; *(s32 *)&io_emul_stub[1] = (char *)host_to_guest_gpr_switch - &io_emul_stub[5]; /* 7 x nop */ memset(&io_emul_stub[5], 0x90, 7);#endif /* data16 or nop */ io_emul_stub[12] = (op_bytes != 2) ? 0x90 : 0x66; /* <io-access opcode> */ io_emul_stub[13] = opcode; /* imm8 or nop */ io_emul_stub[14] = 0x90; /* ret (jumps to guest_to_host_gpr_switch) */ io_emul_stub[15] = 0xc3; /* Handy function-typed pointer to the stub. */ io_emul = (void *)io_emul_stub; if ( ioemul_handle_quirk ) ioemul_handle_quirk(opcode, &io_emul_stub[12], regs); /* I/O Port and Interrupt Flag instructions. */ switch ( opcode ) { case 0xe4: /* IN imm8,%al */ op_bytes = 1; case 0xe5: /* IN imm8,%eax */ port = insn_fetch(u8, code_base, eip, code_limit); io_emul_stub[14] = port; /* imm8 */ exec_in: if ( !guest_io_okay(port, op_bytes, v, regs) ) goto fail; if ( admin_io_okay(port, op_bytes, v, regs) ) { io_emul(regs); } else { if ( op_bytes == 4 ) regs->eax = 0; else regs->eax &= ~((1u << (op_bytes * 8)) - 1); regs->eax |= guest_io_read(port, op_bytes, v, regs); } bpmatch = check_guest_io_breakpoint(v, port, op_bytes); goto done; case 0xec: /* IN %dx,%al */ op_bytes = 1; case 0xed: /* IN %dx,%eax */ port = (u16)regs->edx; goto exec_in; case 0xe6: /* OUT %al,imm8 */ op_bytes = 1; case 0xe7: /* OUT %eax,imm8 */ port = insn_fetch(u8, code_base, eip, code_limit); io_emul_stub[14] = port; /* imm8 */ exec_out: if ( !guest_io_okay(port, op_bytes, v, regs) ) goto fail; if ( admin_io_okay(port, op_bytes, v, regs) ) { io_emul(regs); if ( (op_bytes == 1) && pv_post_outb_hook ) pv_post_outb_hook(port, regs->eax); } else { guest_io_write(port, op_bytes, regs->eax, v, regs); } bpmatch = check_guest_io_breakpoint(v, port, op_bytes); goto done; case 0xee: /* OUT %al,%dx */ op_bytes = 1; case 0xef: /* OUT %eax,%dx */ port = (u16)regs->edx; goto exec_out; case 0xfa: /* CLI */ case 0xfb: /* STI */ if ( v->arch.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) ) goto fail; /* * This is just too dangerous to allow, in my opinion. Consider if the * caller then tries to reenable interrupts using POPF: we can't trap * that and we'll end up with hard-to-debug lockups. Fast & loose will * do for us. :-) */ /*v->vcpu_info->evtchn_upcall_mask = (opcode == 0xfa);*/ goto done; } /* No decode of this single-byte opcode. */ goto fail; twobyte_opcode: /* Two-byte opcodes only emulated from guest kernel. */ if ( !guest_kernel_mode(v, regs) ) goto fail; /* Privileged (ring 0) instructions. */ opcode = insn_fetch(u8, code_base, eip, code_limit); if ( lock && (opcode & ~3) != 0x20 ) goto fail; switch ( opcode ) { case 0x06: /* CLTS */ (void)do_fpu_taskswitch(0); break; case 0x09: /* WBINVD */ /* Ignore the instruction if unprivileged. */ if ( !cache_flush_permitted(v->domain) ) /* Non-physdev domain attempted WBINVD; ignore for now since newer linux uses this in some start-of-day timing loops */ ; else wbinvd(); break; case 0x20: /* MOV CR?,<reg> */ opcode = insn_fetch(u8, code_base, eip, code_limit); if ( opcode < 0xc0 ) goto fail; modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); switch ( modrm_reg ) { case 0: /* Read CR0 */ *reg = (read_cr0() & ~X86_CR0_TS) | v->arch.guest_context.ctrlreg[0]; break; case 2: /* Read CR2 */ *reg = v->arch.guest_context.ctrlreg[2]; break; case 3: /* Read CR3 */ if ( !is_pv_32on64_vcpu(v) ) *reg = xen_pfn_to_cr3(mfn_to_gmfn( v->domain, pagetable_get_pfn(v->arch.guest_table)));#ifdef CONFIG_COMPAT else *reg = compat_pfn_to_cr3(mfn_to_gmfn( v->domain, l4e_get_pfn(*(l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)))));#endif break; case 4: /* Read CR4 */ /* * Guests can read CR4 to see what features Xen has enabled. We * therefore lie about PGE & PSE as they are unavailable to guests. */ *reg = read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE); break; default: goto fail; } break; case 0x21: /* MOV DR?,<reg> */ opcode = insn_fetch(u8, code_base, eip, code_limit); if ( opcode < 0xc0 ) goto fail; modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); if ( (res = do_get_debugreg(modrm_reg)) > (unsigned long)-256 ) goto fail; *reg = res; break; case 0x22: /* MOV <reg>,CR? */ opcode = insn_fetch(u8, code_base, eip, code_limit); if ( opcode < 0xc0 ) goto fail; modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); switch ( modrm_reg ) { case 0: /* Write CR0 */ if ( (*reg ^ read_cr0()) & ~X86_CR0_TS ) { gdprintk(XENLOG_WARNING, "Attempt to change unmodifiable CR0 flags.\n"); goto fail; } (void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS)); break; case 2: /* Write CR2 */ v->arch.guest_context.ctrlreg[2] = *reg; arch_set_cr2(v, *reg); break; case 3: /* Write CR3 */ domain_lock(v->domain); if ( !is_pv_32on64_vcpu(v) ) rc = new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg)));#ifdef CONFIG_COMPAT else rc = new_guest_cr3(gmfn_to_mfn(v->domain, compat_cr3_to_pfn(*reg)));#endif domain_unlock(v->domain); if ( rc == 0 ) /* not okay */ goto fail; break; case 4: /* Write CR4 */ v->arch.guest_context.ctrlreg[4] = pv_guest_cr4_fixup(*reg); write_cr4(pv_guest_cr4_to_real_cr4( v->arch.guest_context.ctrlreg[4])); break; default: goto fail; } break; case 0x23: /* MOV <reg>,DR? */ opcode = insn_fetch(u8, code_base, eip, code_limit); if ( opcode < 0xc0 ) goto fail; modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); if ( do_set_debugreg(modrm_reg, *reg) != 0 ) goto fail; break; case 0x30: /* WRMSR */ eax = regs->eax; edx = regs->edx; res = ((u64)edx << 32) | eax; switch ( (u32)regs->ecx ) {#ifdef CONFIG_X86_64 case MSR_FS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; if ( wrmsr_safe(MSR_FS_BASE, eax, edx) ) goto fail; v->arch.guest_context.fs_base = res; break; case MSR_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; if ( wrmsr_safe(MSR_GS_BASE, eax, edx) ) goto fail; v->arch.guest_context.gs_base_kernel = res; break; case MSR_SHADOW_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; if ( wrmsr_safe(MSR_SHADOW_GS_BASE, eax, edx) ) goto fail; v->arch.guest_context.gs_base_user = res; break;#endif case MSR_K7_FID_VID_STATUS: case MSR_K7_FID_VID_CTL: case MSR_K8_PSTATE_LIMIT: case MSR_K8_PSTATE_CTRL: case MSR_K8_PSTATE_STATUS: case MSR_K8_PSTATE0: case MSR_K8_PSTATE1: case MSR_K8_PSTATE2: case MSR_K8_PSTATE3: case MSR_K8_PSTATE4: case MSR_K8_PSTATE5: case MSR_K8_PSTATE6: case MSR_K8_PSTATE7: if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ) goto fail; if ( cpufreq_controller != FREQCTL_dom0_kernel ) break; if ( wrmsr_safe(regs->ecx, eax, edx) != 0 ) goto fail; break; case MSR_IA32_PERF_CTL: if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) goto fail; if ( cpufreq_controller != FREQCTL_dom0_kernel ) break; if ( wrmsr_safe(regs->ecx, eax, edx) != 0 ) goto fail; break; default: if ( wrmsr_hypervisor_regs(regs->ecx, eax, edx) ) break; if ( (rdmsr_safe(regs->ecx, l, h) != 0) || (eax != l) || (edx != h) ) gdprintk(XENLOG_WARNING, "Domain attempted WRMSR %p from " "%08x:%08x to %08x:%08x.\n", _p(regs->ecx), h, l, edx, eax); break; } break; case 0x31: /* RDTSC */ rdtsc(regs->eax, regs->edx); break; case 0x32: /* RDMSR */ switch ( (u32)regs->ecx ) {#ifdef CONFIG_X86_64 case MSR_FS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; regs->eax = v->arch.guest_context.fs_base & 0xFFFFFFFFUL; regs->edx = v->arch.guest_context.fs_base >> 32; break; case MSR_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; regs->eax = v->arch.guest_context.gs_base_kernel & 0xFFFFFFFFUL; regs->edx = v->arch.guest_context.gs_base_kernel >> 32; break; case MSR_SHADOW_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; regs->eax = v->arch.guest_context.gs_base_user & 0xFFFFFFFFUL; regs->edx = v->arch.guest_context.gs_base_user >> 32; break;#endif case MSR_K7_FID_VID_CTL: case MSR_K7_FID_VID_STATUS: case MSR_K8_PSTATE_LIMIT: case MSR_K8_PSTATE_CTRL: case MSR_K8_PSTATE_STATUS: case MSR_K8_PSTATE0: case MSR_K8_PSTATE1: case MSR_K8_PSTATE2: case MSR_K8_PSTATE3: case MSR_K8_PSTATE4: case MSR_K8_PSTATE5: case MSR_K8_PSTATE6: case MSR_K8_PSTATE7: if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ) goto fail; if ( cpufreq_controller != FREQCTL_dom0_kernel ) { regs->eax = regs->edx = 0; break; } if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) != 0 )
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -