📄 traps.c
字号:
machine_check_vector(regs, regs->error_code);}void propagate_page_fault(unsigned long addr, u16 error_code){ struct trap_info *ti; struct vcpu *v = current; struct trap_bounce *tb = &v->arch.trap_bounce; v->arch.guest_context.ctrlreg[2] = addr; arch_set_cr2(v, addr); /* Re-set error_code.user flag appropriately for the guest. */ error_code &= ~PFEC_user_mode; if ( !guest_kernel_mode(v, guest_cpu_user_regs()) ) error_code |= PFEC_user_mode; trace_pv_page_fault(addr, error_code); ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault]; tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE; tb->error_code = error_code; tb->cs = ti->cs; tb->eip = ti->address; if ( TI_GET_IF(ti) ) tb->flags |= TBF_INTERRUPT; if ( unlikely(null_trap_bounce(v, tb)) ) { printk("Unhandled page fault in domain %d on VCPU %d (ec=%04X)\n", v->domain->domain_id, v->vcpu_id, error_code); show_page_walk(addr); }}static int handle_gdt_ldt_mapping_fault( unsigned long offset, struct cpu_user_regs *regs){ struct vcpu *curr = current; /* Which vcpu's area did we fault in, and is it in the ldt sub-area? */ unsigned int is_ldt_area = (offset >> (GDT_LDT_VCPU_VA_SHIFT-1)) & 1; unsigned int vcpu_area = (offset >> GDT_LDT_VCPU_VA_SHIFT); /* Should never fault in another vcpu's area. */ BUG_ON(vcpu_area != curr->vcpu_id); /* Byte offset within the gdt/ldt sub-area. */ offset &= (1UL << (GDT_LDT_VCPU_VA_SHIFT-1)) - 1UL; if ( likely(is_ldt_area) ) { /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */ if ( likely(map_ldt_shadow_page(offset >> PAGE_SHIFT)) ) { if ( guest_mode(regs) ) trace_trap_two_addr(TRC_PV_GDT_LDT_MAPPING_FAULT, regs->eip, offset); } else { /* In hypervisor mode? Leave it to the #PF handler to fix up. */ if ( !guest_mode(regs) ) return 0; /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */ propagate_page_fault( curr->arch.guest_context.ldt_base + offset, regs->error_code); } } else { /* GDT fault: handle the fault as #GP(selector). */ regs->error_code = (u16)offset & ~7; (void)do_general_protection(regs); } return EXCRET_fault_fixed;}#ifdef HYPERVISOR_VIRT_END#define IN_HYPERVISOR_RANGE(va) \ (((va) >= HYPERVISOR_VIRT_START) && ((va) < HYPERVISOR_VIRT_END))#else#define IN_HYPERVISOR_RANGE(va) \ (((va) >= HYPERVISOR_VIRT_START))#endifstatic int __spurious_page_fault( unsigned long addr, struct cpu_user_regs *regs){ unsigned long mfn, cr3 = read_cr3();#if CONFIG_PAGING_LEVELS >= 4 l4_pgentry_t l4e, *l4t;#endif#if CONFIG_PAGING_LEVELS >= 3 l3_pgentry_t l3e, *l3t;#endif l2_pgentry_t l2e, *l2t; l1_pgentry_t l1e, *l1t; unsigned int required_flags, disallowed_flags; /* * We do not take spurious page faults in IRQ handlers as we do not * modify page tables in IRQ context. We therefore bail here because * map_domain_page() is not IRQ-safe. */ if ( in_irq() ) return 0; /* Reserved bit violations are never spurious faults. */ if ( regs->error_code & PFEC_reserved_bit ) return 0; required_flags = _PAGE_PRESENT; if ( regs->error_code & PFEC_write_access ) required_flags |= _PAGE_RW; if ( regs->error_code & PFEC_user_mode ) required_flags |= _PAGE_USER; disallowed_flags = 0; if ( regs->error_code & PFEC_insn_fetch ) disallowed_flags |= _PAGE_NX; mfn = cr3 >> PAGE_SHIFT;#if CONFIG_PAGING_LEVELS >= 4 l4t = map_domain_page(mfn); l4e = l4e_read_atomic(&l4t[l4_table_offset(addr)]); mfn = l4e_get_pfn(l4e); unmap_domain_page(l4t); if ( ((l4e_get_flags(l4e) & required_flags) != required_flags) || (l4e_get_flags(l4e) & disallowed_flags) ) return 0;#endif#if CONFIG_PAGING_LEVELS >= 3 l3t = map_domain_page(mfn);#ifdef CONFIG_X86_PAE l3t += (cr3 & 0xFE0UL) >> 3;#endif l3e = l3e_read_atomic(&l3t[l3_table_offset(addr)]); mfn = l3e_get_pfn(l3e); unmap_domain_page(l3t);#ifdef CONFIG_X86_PAE if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) return 0;#else if ( ((l3e_get_flags(l3e) & required_flags) != required_flags) || (l3e_get_flags(l3e) & disallowed_flags) ) return 0;#endif#endif l2t = map_domain_page(mfn); l2e = l2e_read_atomic(&l2t[l2_table_offset(addr)]); mfn = l2e_get_pfn(l2e); unmap_domain_page(l2t); if ( ((l2e_get_flags(l2e) & required_flags) != required_flags) || (l2e_get_flags(l2e) & disallowed_flags) ) return 0; if ( l2e_get_flags(l2e) & _PAGE_PSE ) { l1e = l1e_empty(); /* define before use in debug tracing */ goto spurious; } l1t = map_domain_page(mfn); l1e = l1e_read_atomic(&l1t[l1_table_offset(addr)]); mfn = l1e_get_pfn(l1e); unmap_domain_page(l1t); if ( ((l1e_get_flags(l1e) & required_flags) != required_flags) || (l1e_get_flags(l1e) & disallowed_flags) ) return 0; spurious: dprintk(XENLOG_WARNING, "Spurious fault in domain %u:%u " "at addr %lx, e/c %04x\n", current->domain->domain_id, current->vcpu_id, addr, regs->error_code);#if CONFIG_PAGING_LEVELS >= 4 dprintk(XENLOG_WARNING, " l4e = %"PRIpte"\n", l4e_get_intpte(l4e));#endif#if CONFIG_PAGING_LEVELS >= 3 dprintk(XENLOG_WARNING, " l3e = %"PRIpte"\n", l3e_get_intpte(l3e));#endif dprintk(XENLOG_WARNING, " l2e = %"PRIpte"\n", l2e_get_intpte(l2e)); dprintk(XENLOG_WARNING, " l1e = %"PRIpte"\n", l1e_get_intpte(l1e));#ifndef NDEBUG show_registers(regs);#endif return 1;}static int spurious_page_fault( unsigned long addr, struct cpu_user_regs *regs){ unsigned long flags; int is_spurious; /* * Disabling interrupts prevents TLB flushing, and hence prevents * page tables from becoming invalid under our feet during the walk. */ local_irq_save(flags); is_spurious = __spurious_page_fault(addr, regs); local_irq_restore(flags); return is_spurious;}static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs){ struct vcpu *v = current; struct domain *d = v->domain; /* No fixups in interrupt context or when interrupts are disabled. */ if ( in_irq() || !(regs->eflags & X86_EFLAGS_IF) ) return 0; if ( unlikely(IN_HYPERVISOR_RANGE(addr)) ) { if ( paging_mode_external(d) && guest_mode(regs) ) { int ret = paging_fault(addr, regs); if ( ret == EXCRET_fault_fixed ) trace_trap_two_addr(TRC_PV_PAGING_FIXUP, regs->eip, addr); return ret; } if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) ) return handle_gdt_ldt_mapping_fault( addr - GDT_LDT_VIRT_START, regs); return 0; } if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && guest_kernel_mode(v, regs) && /* Do not check if access-protection fault since the page may legitimately be not present in shadow page tables */ ((regs->error_code & PFEC_write_access) == PFEC_write_access) && ptwr_do_page_fault(v, addr, regs) ) return EXCRET_fault_fixed; if ( paging_mode_enabled(d) ) { int ret = paging_fault(addr, regs); if ( ret == EXCRET_fault_fixed ) trace_trap_two_addr(TRC_PV_PAGING_FIXUP, regs->eip, addr); return ret; } return 0;}/* * #PF error code: * Bit 0: Protection violation (=1) ; Page not present (=0) * Bit 1: Write access * Bit 2: User mode (=1) ; Supervisor mode (=0) * Bit 3: Reserved bit violation * Bit 4: Instruction fetch */asmlinkage void do_page_fault(struct cpu_user_regs *regs){ unsigned long addr, fixup; addr = read_cr2(); DEBUGGER_trap_entry(TRAP_page_fault, regs); perfc_incr(page_faults); if ( unlikely(fixup_page_fault(addr, regs) != 0) ) return; if ( unlikely(!guest_mode(regs)) ) { if ( spurious_page_fault(addr, regs) ) return; if ( likely((fixup = search_exception_table(regs->eip)) != 0) ) { perfc_incr(copy_user_faults); regs->eip = fixup; return; } DEBUGGER_trap_fatal(TRAP_page_fault, regs); show_execution_state(regs); show_page_walk(addr); panic("FATAL PAGE FAULT\n" "[error_code=%04x]\n" "Faulting linear address: %p\n", regs->error_code, _p(addr)); } propagate_page_fault(addr, regs->error_code);}/* * Early #PF handler to print CR2, error code, and stack. * * We also deal with spurious faults here, even though they should never happen * during early boot (an issue was seen once, but was most likely a hardware * problem). */asmlinkage void do_early_page_fault(struct cpu_user_regs *regs){ static int stuck; static unsigned long prev_eip, prev_cr2; unsigned long cr2 = read_cr2(); BUG_ON(smp_processor_id() != 0); if ( (regs->eip != prev_eip) || (cr2 != prev_cr2) ) { prev_eip = regs->eip; prev_cr2 = cr2; stuck = 0; return; } if ( stuck++ == 1000 ) { unsigned long *stk = (unsigned long *)regs; printk("Early fatal page fault at %04x:%p (cr2=%p, ec=%04x)\n", regs->cs, _p(regs->eip), _p(cr2), regs->error_code); printk("Stack dump: "); while ( ((long)stk & ((PAGE_SIZE - 1) & ~(BYTES_PER_LONG - 1))) != 0 ) printk("%p ", _p(*stk++)); for ( ; ; ) ; }}long do_fpu_taskswitch(int set){ struct vcpu *v = current; if ( set ) { v->arch.guest_context.ctrlreg[0] |= X86_CR0_TS; stts(); } else { v->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS; if ( v->fpu_dirtied ) clts(); } return 0;}static int read_descriptor(unsigned int sel, const struct vcpu *v, const struct cpu_user_regs * regs, unsigned long *base, unsigned long *limit, unsigned int *ar, unsigned int vm86attr){ struct desc_struct desc; if ( !vm86_mode(regs) ) { if ( sel < 4) desc.b = desc.a = 0; else if ( __get_user(desc, (const struct desc_struct *)(!(sel & 4) ? GDT_VIRT_START(v) : LDT_VIRT_START(v)) + (sel >> 3)) ) return 0; if ( !(vm86attr & _SEGMENT_CODE) ) desc.b &= ~_SEGMENT_L; } else { desc.a = (sel << 20) | 0xffff; desc.b = vm86attr | (sel >> 12); } *ar = desc.b & 0x00f0ff00; if ( !(desc.b & _SEGMENT_L) ) { *base = ((desc.a >> 16) + ((desc.b & 0xff) << 16) + (desc.b & 0xff000000)); *limit = (desc.a & 0xffff) | (desc.b & 0x000f0000); if ( desc.b & _SEGMENT_G ) *limit = ((*limit + 1) << 12) - 1;#ifndef NDEBUG if ( !vm86_mode(regs) && (sel > 3) ) { unsigned int a, l; unsigned char valid; asm volatile ( "larl %2,%0 ; setz %1" : "=r" (a), "=rm" (valid) : "rm" (sel)); BUG_ON(valid && ((a & 0x00f0ff00) != *ar)); asm volatile ( "lsll %2,%0 ; setz %1" : "=r" (l), "=rm" (valid) : "rm" (sel)); BUG_ON(valid && (l != *limit)); }#endif } else { *base = 0UL; *limit = ~0UL; } return 1;}#ifdef __x86_64__static int read_gate_descriptor(unsigned int gate_sel, const struct vcpu *v, unsigned int *sel, unsigned long *off, unsigned int *ar){ struct desc_struct desc; const struct desc_struct *pdesc; pdesc = (const struct desc_struct *) (!(gate_sel & 4) ? GDT_VIRT_START(v) : LDT_VIRT_START(v)) + (gate_sel >> 3); if ( (gate_sel < 4) || ((gate_sel >= FIRST_RESERVED_GDT_BYTE) && !(gate_sel & 4)) ||
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -