fault.c
来自「LINUX 2.6.17.4的源码」· C语言 代码 · 共 627 行 · 第 1/2 页
C
627 行
/* * linux/arch/i386/mm/fault.c * * Copyright (C) 1995 Linus Torvalds */#include <linux/signal.h>#include <linux/sched.h>#include <linux/kernel.h>#include <linux/errno.h>#include <linux/string.h>#include <linux/types.h>#include <linux/ptrace.h>#include <linux/mman.h>#include <linux/mm.h>#include <linux/smp.h>#include <linux/smp_lock.h>#include <linux/interrupt.h>#include <linux/init.h>#include <linux/tty.h>#include <linux/vt_kern.h> /* For unblank_screen() */#include <linux/highmem.h>#include <linux/module.h>#include <linux/kprobes.h>#include <asm/system.h>#include <asm/uaccess.h>#include <asm/desc.h>#include <asm/kdebug.h>extern void die(const char *,struct pt_regs *,long);/* * Unlock any spinlocks which will prevent us from getting the * message out */void bust_spinlocks(int yes){ int loglevel_save = console_loglevel; if (yes) { oops_in_progress = 1; return; }#ifdef CONFIG_VT unblank_screen();#endif oops_in_progress = 0; /* * OK, the message is on the console. Now we call printk() * without oops_in_progress set so that printk will give klogd * a poke. Hold onto your hats... */ console_loglevel = 15; /* NMI oopser may have shut the console up */ printk(" "); console_loglevel = loglevel_save;}/* * Return EIP plus the CS segment base. The segment limit is also * adjusted, clamped to the kernel/user address space (whichever is * appropriate), and returned in *eip_limit. * * The segment is checked, because it might have been changed by another * task between the original faulting instruction and here. * * If CS is no longer a valid code segment, or if EIP is beyond the * limit, or if it is a kernel address when CS is not a kernel segment, * then the returned value will be greater than *eip_limit. * * This is slow, but is very rarely executed. */static inline unsigned long get_segment_eip(struct pt_regs *regs, unsigned long *eip_limit){ unsigned long eip = regs->eip; unsigned seg = regs->xcs & 0xffff; u32 seg_ar, seg_limit, base, *desc; /* The standard kernel/user address space limit. */ *eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg; /* Unlikely, but must come before segment checks. */ if (unlikely((regs->eflags & VM_MASK) != 0)) return eip + (seg << 4); /* By far the most common cases. */ if (likely(seg == __USER_CS || seg == __KERNEL_CS)) return eip; /* Check the segment exists, is within the current LDT/GDT size, that kernel/user (ring 0..3) has the appropriate privilege, that it's a code segment, and get the limit. */ __asm__ ("larl %3,%0; lsll %3,%1" : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg)); if ((~seg_ar & 0x9800) || eip > seg_limit) { *eip_limit = 0; return 1; /* So that returned eip > *eip_limit. */ } /* Get the GDT/LDT descriptor base. When you look for races in this code remember that LDT and other horrors are only used in user space. */ if (seg & (1<<2)) { /* Must lock the LDT while reading it. */ down(¤t->mm->context.sem); desc = current->mm->context.ldt; desc = (void *)desc + (seg & ~7); } else { /* Must disable preemption while reading the GDT. */ desc = (u32 *)get_cpu_gdt_table(get_cpu()); desc = (void *)desc + (seg & ~7); } /* Decode the code segment base from the descriptor */ base = get_desc_base((unsigned long *)desc); if (seg & (1<<2)) { up(¤t->mm->context.sem); } else put_cpu(); /* Adjust EIP and segment limit, and clamp at the kernel limit. It's legitimate for segments to wrap at 0xffffffff. */ seg_limit += base; if (seg_limit < *eip_limit && seg_limit >= base) *eip_limit = seg_limit; return eip + base;}/* * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. * Check that here and ignore it. */static int __is_prefetch(struct pt_regs *regs, unsigned long addr){ unsigned long limit; unsigned long instr = get_segment_eip (regs, &limit); int scan_more = 1; int prefetch = 0; int i; for (i = 0; scan_more && i < 15; i++) { unsigned char opcode; unsigned char instr_hi; unsigned char instr_lo; if (instr > limit) break; if (__get_user(opcode, (unsigned char __user *) instr)) break; instr_hi = opcode & 0xf0; instr_lo = opcode & 0x0f; instr++; switch (instr_hi) { case 0x20: case 0x30: /* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */ scan_more = ((instr_lo & 7) == 0x6); break; case 0x60: /* 0x64 thru 0x67 are valid prefixes in all modes. */ scan_more = (instr_lo & 0xC) == 0x4; break; case 0xF0: /* 0xF0, 0xF2, and 0xF3 are valid prefixes */ scan_more = !instr_lo || (instr_lo>>1) == 1; break; case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ scan_more = 0; if (instr > limit) break; if (__get_user(opcode, (unsigned char __user *) instr)) break; prefetch = (instr_lo == 0xF) && (opcode == 0x0D || opcode == 0x18); break; default: scan_more = 0; break; } } return prefetch;}static inline int is_prefetch(struct pt_regs *regs, unsigned long addr, unsigned long error_code){ if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 >= 6)) { /* Catch an obscure case of prefetch inside an NX page. */ if (nx_enabled && (error_code & 16)) return 0; return __is_prefetch(regs, addr); } return 0;} static noinline void force_sig_info_fault(int si_signo, int si_code, unsigned long address, struct task_struct *tsk){ siginfo_t info; info.si_signo = si_signo; info.si_errno = 0; info.si_code = si_code; info.si_addr = (void __user *)address; force_sig_info(si_signo, &info, tsk);}fastcall void do_invalid_op(struct pt_regs *, unsigned long);static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address){ unsigned index = pgd_index(address); pgd_t *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pgd += index; pgd_k = init_mm.pgd + index; if (!pgd_present(*pgd_k)) return NULL; /* * set_pgd(pgd, *pgd_k); here would be useless on PAE * and redundant with the set_pmd() on non-PAE. As would * set_pud. */ pud = pud_offset(pgd, address); pud_k = pud_offset(pgd_k, address); if (!pud_present(*pud_k)) return NULL; pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) return NULL; if (!pmd_present(*pmd)) set_pmd(pmd, *pmd_k); else BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); return pmd_k;}/* * Handle a fault on the vmalloc or module mapping area * * This assumes no large pages in there. */static inline int vmalloc_fault(unsigned long address){ unsigned long pgd_paddr; pmd_t *pmd_k; pte_t *pte_k; /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Do _not_ use "current" here. We might be inside * an interrupt in the middle of a task switch.. */ pgd_paddr = read_cr3(); pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); if (!pmd_k) return -1; pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) return -1; return 0;}/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. * * error_code: * bit 0 == 0 means no page found, 1 means protection fault * bit 1 == 0 means read, 1 means write * bit 2 == 0 means kernel, 1 means user-mode * bit 3 == 1 means use of reserved bit detected * bit 4 == 1 means fault was an instruction fetch */fastcall void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code){ struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; unsigned long address; unsigned long page; int write, si_code; /* get the address */ address = read_cr2(); tsk = current; si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table,
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?