fault.c

来自「LINUX 2.6.17.4的源码」· C语言 代码 · 共 627 行 · 第 1/2 页

C
627
字号
/* *  linux/arch/i386/mm/fault.c * *  Copyright (C) 1995  Linus Torvalds */#include <linux/signal.h>#include <linux/sched.h>#include <linux/kernel.h>#include <linux/errno.h>#include <linux/string.h>#include <linux/types.h>#include <linux/ptrace.h>#include <linux/mman.h>#include <linux/mm.h>#include <linux/smp.h>#include <linux/smp_lock.h>#include <linux/interrupt.h>#include <linux/init.h>#include <linux/tty.h>#include <linux/vt_kern.h>		/* For unblank_screen() */#include <linux/highmem.h>#include <linux/module.h>#include <linux/kprobes.h>#include <asm/system.h>#include <asm/uaccess.h>#include <asm/desc.h>#include <asm/kdebug.h>extern void die(const char *,struct pt_regs *,long);/* * Unlock any spinlocks which will prevent us from getting the * message out  */void bust_spinlocks(int yes){	int loglevel_save = console_loglevel;	if (yes) {		oops_in_progress = 1;		return;	}#ifdef CONFIG_VT	unblank_screen();#endif	oops_in_progress = 0;	/*	 * OK, the message is on the console.  Now we call printk()	 * without oops_in_progress set so that printk will give klogd	 * a poke.  Hold onto your hats...	 */	console_loglevel = 15;		/* NMI oopser may have shut the console up */	printk(" ");	console_loglevel = loglevel_save;}/* * Return EIP plus the CS segment base.  The segment limit is also * adjusted, clamped to the kernel/user address space (whichever is * appropriate), and returned in *eip_limit. * * The segment is checked, because it might have been changed by another * task between the original faulting instruction and here. * * If CS is no longer a valid code segment, or if EIP is beyond the * limit, or if it is a kernel address when CS is not a kernel segment, * then the returned value will be greater than *eip_limit. *  * This is slow, but is very rarely executed. */static inline unsigned long get_segment_eip(struct pt_regs *regs,					    unsigned long *eip_limit){	unsigned long eip = regs->eip;	unsigned seg = regs->xcs & 0xffff;	u32 seg_ar, seg_limit, base, *desc;	/* The standard kernel/user address space limit. */	*eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg;	/* Unlikely, but must come before segment checks. */	if (unlikely((regs->eflags & VM_MASK) != 0))		return eip + (seg << 4);		/* By far the most common cases. */	if (likely(seg == __USER_CS || seg == __KERNEL_CS))		return eip;	/* Check the segment exists, is within the current LDT/GDT size,	   that kernel/user (ring 0..3) has the appropriate privilege,	   that it's a code segment, and get the limit. */	__asm__ ("larl %3,%0; lsll %3,%1"		 : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));	if ((~seg_ar & 0x9800) || eip > seg_limit) {		*eip_limit = 0;		return 1;	 /* So that returned eip > *eip_limit. */	}	/* Get the GDT/LDT descriptor base. 	   When you look for races in this code remember that	   LDT and other horrors are only used in user space. */	if (seg & (1<<2)) {		/* Must lock the LDT while reading it. */		down(&current->mm->context.sem);		desc = current->mm->context.ldt;		desc = (void *)desc + (seg & ~7);	} else {		/* Must disable preemption while reading the GDT. */ 		desc = (u32 *)get_cpu_gdt_table(get_cpu());		desc = (void *)desc + (seg & ~7);	}	/* Decode the code segment base from the descriptor */	base = get_desc_base((unsigned long *)desc);	if (seg & (1<<2)) { 		up(&current->mm->context.sem);	} else		put_cpu();	/* Adjust EIP and segment limit, and clamp at the kernel limit.	   It's legitimate for segments to wrap at 0xffffffff. */	seg_limit += base;	if (seg_limit < *eip_limit && seg_limit >= base)		*eip_limit = seg_limit;	return eip + base;}/*  * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. * Check that here and ignore it. */static int __is_prefetch(struct pt_regs *regs, unsigned long addr){ 	unsigned long limit;	unsigned long instr = get_segment_eip (regs, &limit);	int scan_more = 1;	int prefetch = 0; 	int i;	for (i = 0; scan_more && i < 15; i++) { 		unsigned char opcode;		unsigned char instr_hi;		unsigned char instr_lo;		if (instr > limit)			break;		if (__get_user(opcode, (unsigned char __user *) instr))			break; 		instr_hi = opcode & 0xf0; 		instr_lo = opcode & 0x0f; 		instr++;		switch (instr_hi) { 		case 0x20:		case 0x30:			/* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */			scan_more = ((instr_lo & 7) == 0x6);			break;					case 0x60:			/* 0x64 thru 0x67 are valid prefixes in all modes. */			scan_more = (instr_lo & 0xC) == 0x4;			break;				case 0xF0:			/* 0xF0, 0xF2, and 0xF3 are valid prefixes */			scan_more = !instr_lo || (instr_lo>>1) == 1;			break;					case 0x00:			/* Prefetch instruction is 0x0F0D or 0x0F18 */			scan_more = 0;			if (instr > limit)				break;			if (__get_user(opcode, (unsigned char __user *) instr))				break;			prefetch = (instr_lo == 0xF) &&				(opcode == 0x0D || opcode == 0x18);			break;					default:			scan_more = 0;			break;		} 	}	return prefetch;}static inline int is_prefetch(struct pt_regs *regs, unsigned long addr,			      unsigned long error_code){	if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&		     boot_cpu_data.x86 >= 6)) {		/* Catch an obscure case of prefetch inside an NX page. */		if (nx_enabled && (error_code & 16))			return 0;		return __is_prefetch(regs, addr);	}	return 0;} static noinline void force_sig_info_fault(int si_signo, int si_code,	unsigned long address, struct task_struct *tsk){	siginfo_t info;	info.si_signo = si_signo;	info.si_errno = 0;	info.si_code = si_code;	info.si_addr = (void __user *)address;	force_sig_info(si_signo, &info, tsk);}fastcall void do_invalid_op(struct pt_regs *, unsigned long);static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address){	unsigned index = pgd_index(address);	pgd_t *pgd_k;	pud_t *pud, *pud_k;	pmd_t *pmd, *pmd_k;	pgd += index;	pgd_k = init_mm.pgd + index;	if (!pgd_present(*pgd_k))		return NULL;	/*	 * set_pgd(pgd, *pgd_k); here would be useless on PAE	 * and redundant with the set_pmd() on non-PAE. As would	 * set_pud.	 */	pud = pud_offset(pgd, address);	pud_k = pud_offset(pgd_k, address);	if (!pud_present(*pud_k))		return NULL;	pmd = pmd_offset(pud, address);	pmd_k = pmd_offset(pud_k, address);	if (!pmd_present(*pmd_k))		return NULL;	if (!pmd_present(*pmd))		set_pmd(pmd, *pmd_k);	else		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));	return pmd_k;}/* * Handle a fault on the vmalloc or module mapping area * * This assumes no large pages in there. */static inline int vmalloc_fault(unsigned long address){	unsigned long pgd_paddr;	pmd_t *pmd_k;	pte_t *pte_k;	/*	 * Synchronize this task's top level page-table	 * with the 'reference' page table.	 *	 * Do _not_ use "current" here. We might be inside	 * an interrupt in the middle of a task switch..	 */	pgd_paddr = read_cr3();	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);	if (!pmd_k)		return -1;	pte_k = pte_offset_kernel(pmd_k, address);	if (!pte_present(*pte_k))		return -1;	return 0;}/* * This routine handles page faults.  It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. * * error_code: *	bit 0 == 0 means no page found, 1 means protection fault *	bit 1 == 0 means read, 1 means write *	bit 2 == 0 means kernel, 1 means user-mode *	bit 3 == 1 means use of reserved bit detected *	bit 4 == 1 means fault was an instruction fetch */fastcall void __kprobes do_page_fault(struct pt_regs *regs,				      unsigned long error_code){	struct task_struct *tsk;	struct mm_struct *mm;	struct vm_area_struct * vma;	unsigned long address;	unsigned long page;	int write, si_code;	/* get the address */        address = read_cr2();	tsk = current;	si_code = SEGV_MAPERR;	/*	 * We fault-in kernel-space virtual memory on-demand. The	 * 'reference' page table is init_mm.pgd.	 *	 * NOTE! We MUST NOT take any locks for this case. We may	 * be in an interrupt or a critical region, and should	 * only copy the information from the master page table,

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?