📄 fault_32.c
字号:
/* * linux/arch/i386/mm/fault.c * * Copyright (C) 1995 Linus Torvalds */#include <linux/signal.h>#include <linux/sched.h>#include <linux/kernel.h>#include <linux/errno.h>#include <linux/string.h>#include <linux/types.h>#include <linux/ptrace.h>#include <linux/mman.h>#include <linux/mm.h>#include <linux/smp.h>#include <linux/interrupt.h>#include <linux/init.h>#include <linux/tty.h>#include <linux/vt_kern.h> /* For unblank_screen() */#include <linux/highmem.h>#include <linux/bootmem.h> /* for max_low_pfn */#include <linux/vmalloc.h>#include <linux/module.h>#include <linux/kprobes.h>#include <linux/uaccess.h>#include <linux/kdebug.h>#include <linux/kprobes.h>#include <asm/system.h>#include <asm/desc.h>#include <asm/segment.h>extern void die(const char *,struct pt_regs *,long);#ifdef CONFIG_KPROBESstatic inline int notify_page_fault(struct pt_regs *regs){ int ret = 0; /* kprobe_running() needs smp_processor_id() */ if (!user_mode_vm(regs)) { preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 14)) ret = 1; preempt_enable(); } return ret;}#elsestatic inline int notify_page_fault(struct pt_regs *regs){ return 0;}#endif/* * Return EIP plus the CS segment base. The segment limit is also * adjusted, clamped to the kernel/user address space (whichever is * appropriate), and returned in *eip_limit. * * The segment is checked, because it might have been changed by another * task between the original faulting instruction and here. * * If CS is no longer a valid code segment, or if EIP is beyond the * limit, or if it is a kernel address when CS is not a kernel segment, * then the returned value will be greater than *eip_limit. * * This is slow, but is very rarely executed. */static inline unsigned long get_segment_eip(struct pt_regs *regs, unsigned long *eip_limit){ unsigned long eip = regs->eip; unsigned seg = regs->xcs & 0xffff; u32 seg_ar, seg_limit, base, *desc; /* Unlikely, but must come before segment checks. */ if (unlikely(regs->eflags & VM_MASK)) { base = seg << 4; *eip_limit = base + 0xffff; return base + (eip & 0xffff); } /* The standard kernel/user address space limit. */ *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; /* By far the most common cases. */ if (likely(SEGMENT_IS_FLAT_CODE(seg))) return eip; /* Check the segment exists, is within the current LDT/GDT size, that kernel/user (ring 0..3) has the appropriate privilege, that it's a code segment, and get the limit. */ __asm__ ("larl %3,%0; lsll %3,%1" : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg)); if ((~seg_ar & 0x9800) || eip > seg_limit) { *eip_limit = 0; return 1; /* So that returned eip > *eip_limit. */ } /* Get the GDT/LDT descriptor base. When you look for races in this code remember that LDT and other horrors are only used in user space. */ if (seg & (1<<2)) { /* Must lock the LDT while reading it. */ mutex_lock(¤t->mm->context.lock); desc = current->mm->context.ldt; desc = (void *)desc + (seg & ~7); } else { /* Must disable preemption while reading the GDT. */ desc = (u32 *)get_cpu_gdt_table(get_cpu()); desc = (void *)desc + (seg & ~7); } /* Decode the code segment base from the descriptor */ base = get_desc_base((unsigned long *)desc); if (seg & (1<<2)) { mutex_unlock(¤t->mm->context.lock); } else put_cpu(); /* Adjust EIP and segment limit, and clamp at the kernel limit. It's legitimate for segments to wrap at 0xffffffff. */ seg_limit += base; if (seg_limit < *eip_limit && seg_limit >= base) *eip_limit = seg_limit; return eip + base;}/* * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. * Check that here and ignore it. */static int __is_prefetch(struct pt_regs *regs, unsigned long addr){ unsigned long limit; unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit); int scan_more = 1; int prefetch = 0; int i; for (i = 0; scan_more && i < 15; i++) { unsigned char opcode; unsigned char instr_hi; unsigned char instr_lo; if (instr > (unsigned char *)limit) break; if (probe_kernel_address(instr, opcode)) break; instr_hi = opcode & 0xf0; instr_lo = opcode & 0x0f; instr++; switch (instr_hi) { case 0x20: case 0x30: /* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */ scan_more = ((instr_lo & 7) == 0x6); break; case 0x60: /* 0x64 thru 0x67 are valid prefixes in all modes. */ scan_more = (instr_lo & 0xC) == 0x4; break; case 0xF0: /* 0xF0, 0xF2, and 0xF3 are valid prefixes */ scan_more = !instr_lo || (instr_lo>>1) == 1; break; case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ scan_more = 0; if (instr > (unsigned char *)limit) break; if (probe_kernel_address(instr, opcode)) break; prefetch = (instr_lo == 0xF) && (opcode == 0x0D || opcode == 0x18); break; default: scan_more = 0; break; } } return prefetch;}static inline int is_prefetch(struct pt_regs *regs, unsigned long addr, unsigned long error_code){ if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 >= 6)) { /* Catch an obscure case of prefetch inside an NX page. */ if (nx_enabled && (error_code & 16)) return 0; return __is_prefetch(regs, addr); } return 0;} static noinline void force_sig_info_fault(int si_signo, int si_code, unsigned long address, struct task_struct *tsk){ siginfo_t info; info.si_signo = si_signo; info.si_errno = 0; info.si_code = si_code; info.si_addr = (void __user *)address; force_sig_info(si_signo, &info, tsk);}fastcall void do_invalid_op(struct pt_regs *, unsigned long);static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address){ unsigned index = pgd_index(address); pgd_t *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pgd += index; pgd_k = init_mm.pgd + index; if (!pgd_present(*pgd_k)) return NULL; /* * set_pgd(pgd, *pgd_k); here would be useless on PAE * and redundant with the set_pmd() on non-PAE. As would * set_pud. */ pud = pud_offset(pgd, address); pud_k = pud_offset(pgd_k, address); if (!pud_present(*pud_k)) return NULL; pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) return NULL; if (!pmd_present(*pmd)) { set_pmd(pmd, *pmd_k); arch_flush_lazy_mmu_mode(); } else BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); return pmd_k;}/* * Handle a fault on the vmalloc or module mapping area * * This assumes no large pages in there. */static inline int vmalloc_fault(unsigned long address){ unsigned long pgd_paddr; pmd_t *pmd_k; pte_t *pte_k; /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Do _not_ use "current" here. We might be inside * an interrupt in the middle of a task switch.. */ pgd_paddr = read_cr3(); pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); if (!pmd_k) return -1; pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) return -1; return 0;}int show_unhandled_signals = 1;/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. * * error_code: * bit 0 == 0 means no page found, 1 means protection fault * bit 1 == 0 means read, 1 means write * bit 2 == 0 means kernel, 1 means user-mode * bit 3 == 1 means use of reserved bit detected * bit 4 == 1 means fault was an instruction fetch */fastcall void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code){ struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; unsigned long address; int write, si_code; int fault; /* * We can fault from pretty much anywhere, with unknown IRQ state. */ trace_hardirqs_fixup(); /* get the address */ address = read_cr2(); tsk = current; si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. * * This verifies that the fault happens in kernel space * (error_code & 4) == 0, and that the fault was not a * protection error (error_code & 9) == 0. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -