📄 fault_64.c
字号:
/* * linux/arch/x86-64/mm/fault.c * * Copyright (C) 1995 Linus Torvalds * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. */#include <linux/signal.h>#include <linux/sched.h>#include <linux/kernel.h>#include <linux/errno.h>#include <linux/string.h>#include <linux/types.h>#include <linux/ptrace.h>#include <linux/mman.h>#include <linux/mm.h>#include <linux/smp.h>#include <linux/interrupt.h>#include <linux/init.h>#include <linux/tty.h>#include <linux/vt_kern.h> /* For unblank_screen() */#include <linux/compiler.h>#include <linux/vmalloc.h>#include <linux/module.h>#include <linux/kprobes.h>#include <linux/uaccess.h>#include <linux/kdebug.h>#include <linux/kprobes.h>#include <asm/system.h>#include <asm/pgalloc.h>#include <asm/smp.h>#include <asm/tlbflush.h>#include <asm/proto.h>#include <asm-generic/sections.h>/* Page fault error code bits */#define PF_PROT (1<<0) /* or no page found */#define PF_WRITE (1<<1)#define PF_USER (1<<2)#define PF_RSVD (1<<3)#define PF_INSTR (1<<4)#ifdef CONFIG_KPROBESstatic inline int notify_page_fault(struct pt_regs *regs){ int ret = 0; /* kprobe_running() needs smp_processor_id() */ if (!user_mode(regs)) { preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 14)) ret = 1; preempt_enable(); } return ret;}#elsestatic inline int notify_page_fault(struct pt_regs *regs){ return 0;}#endif/* Sometimes the CPU reports invalid exceptions on prefetch. Check that here and ignore. Opcode checker based on code by Richard Brunner */static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, unsigned long error_code){ unsigned char *instr; int scan_more = 1; int prefetch = 0; unsigned char *max_instr; /* If it was a exec fault ignore */ if (error_code & PF_INSTR) return 0; instr = (unsigned char __user *)convert_rip_to_linear(current, regs); max_instr = instr + 15; if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) return 0; while (scan_more && instr < max_instr) { unsigned char opcode; unsigned char instr_hi; unsigned char instr_lo; if (probe_kernel_address(instr, opcode)) break; instr_hi = opcode & 0xf0; instr_lo = opcode & 0x0f; instr++; switch (instr_hi) { case 0x20: case 0x30: /* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. In long mode, the CPU will signal invalid opcode if some of these prefixes are present so we will never get here anyway */ scan_more = ((instr_lo & 7) == 0x6); break; case 0x40: /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes Need to figure out under what instruction mode the instruction was issued ... */ /* Could check the LDT for lm, but for now it's good enough to assume that long mode only uses well known segments or kernel. */ scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS); break; case 0x60: /* 0x64 thru 0x67 are valid prefixes in all modes. */ scan_more = (instr_lo & 0xC) == 0x4; break; case 0xF0: /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */ scan_more = !instr_lo || (instr_lo>>1) == 1; break; case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ scan_more = 0; if (probe_kernel_address(instr, opcode)) break; prefetch = (instr_lo == 0xF) && (opcode == 0x0D || opcode == 0x18); break; default: scan_more = 0; break; } } return prefetch;}static int bad_address(void *p) { unsigned long dummy; return probe_kernel_address((unsigned long *)p, dummy);} void dump_pagetable(unsigned long address){ pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; pgd = (pgd_t *)read_cr3(); pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); pgd += pgd_index(address); if (bad_address(pgd)) goto bad; printk("PGD %lx ", pgd_val(*pgd)); if (!pgd_present(*pgd)) goto ret; pud = pud_offset(pgd, address); if (bad_address(pud)) goto bad; printk("PUD %lx ", pud_val(*pud)); if (!pud_present(*pud)) goto ret; pmd = pmd_offset(pud, address); if (bad_address(pmd)) goto bad; printk("PMD %lx ", pmd_val(*pmd)); if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; pte = pte_offset_kernel(pmd, address); if (bad_address(pte)) goto bad; printk("PTE %lx", pte_val(*pte)); ret: printk("\n"); return;bad: printk("BAD\n");}static const char errata93_warning[] = KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"KERN_ERR "******* Please consider a BIOS update.\n"KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";/* Workaround for K8 erratum #93 & buggy BIOS. BIOS SMM functions are required to use a specific workaround to avoid corruption of the 64bit RIP register on C stepping K8. A lot of BIOS that didn't get tested properly miss this. The OS sees this as a page fault with the upper 32bits of RIP cleared. Try to work around it here. Note we only handle faults in kernel here. */static int is_errata93(struct pt_regs *regs, unsigned long address) { static int warned; if (address != regs->rip) return 0; if ((address >> 32) != 0) return 0; address |= 0xffffffffUL << 32; if ((address >= (u64)_stext && address <= (u64)_etext) || (address >= MODULES_VADDR && address <= MODULES_END)) { if (!warned) { printk(errata93_warning); warned = 1; } regs->rip = address; return 1; } return 0;} static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, unsigned long error_code){ unsigned long flags = oops_begin(); struct task_struct *tsk; printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", current->comm, address); dump_pagetable(address); tsk = current; tsk->thread.cr2 = address; tsk->thread.trap_no = 14; tsk->thread.error_code = error_code; __die("Bad pagetable", regs, error_code); oops_end(flags); do_exit(SIGKILL);}/* * Handle a fault on the vmalloc area * * This assumes no large pages in there. */static int vmalloc_fault(unsigned long address){ pgd_t *pgd, *pgd_ref; pud_t *pud, *pud_ref; pmd_t *pmd, *pmd_ref; pte_t *pte, *pte_ref; /* Copy kernel mappings over when needed. This can also happen within a race in page table update. In the later case just flush. */ pgd = pgd_offset(current->mm ?: &init_mm, address); pgd_ref = pgd_offset_k(address); if (pgd_none(*pgd_ref)) return -1; if (pgd_none(*pgd)) set_pgd(pgd, *pgd_ref); else BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); /* Below here mismatches are bugs because these lower tables are shared */ pud = pud_offset(pgd, address); pud_ref = pud_offset(pgd_ref, address); if (pud_none(*pud_ref)) return -1; if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) BUG(); pmd = pmd_offset(pud, address); pmd_ref = pmd_offset(pud_ref, address); if (pmd_none(*pmd_ref)) return -1; if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) BUG(); pte_ref = pte_offset_kernel(pmd_ref, address); if (!pte_present(*pte_ref)) return -1; pte = pte_offset_kernel(pmd, address); /* Don't use pte_page here, because the mappings can point outside mem_map, and the NUMA hash lookup cannot handle that. */ if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) BUG(); return 0;}int show_unhandled_signals = 1;/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. */asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code){ struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; unsigned long address; const struct exception_table_entry *fixup; int write, fault; unsigned long flags; siginfo_t info; /* * We can fault from pretty much anywhere, with unknown IRQ state. */ trace_hardirqs_fixup(); tsk = current;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -