fault.c

来自「LINUX 2.6.17.4的源码」· C语言 代码 · 共 627 行 · 第 1/2 页

C
627
字号
	 * nothing more.	 *	 * This verifies that the fault happens in kernel space	 * (error_code & 4) == 0, and that the fault was not a	 * protection error (error_code & 9) == 0.	 */	if (unlikely(address >= TASK_SIZE)) {		if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)			return;		if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,						SIGSEGV) == NOTIFY_STOP)			return;		/*		 * Don't take the mm semaphore here. If we fixup a prefetch		 * fault we could otherwise deadlock.		 */		goto bad_area_nosemaphore;	}	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,					SIGSEGV) == NOTIFY_STOP)		return;	/* It's safe to allow irq's after cr2 has been saved and the vmalloc	   fault has been handled. */	if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))		local_irq_enable();	mm = tsk->mm;	/*	 * If we're in an interrupt, have no user context or are running in an	 * atomic region then we must not take the fault..	 */	if (in_atomic() || !mm)		goto bad_area_nosemaphore;	/* When running in the kernel we expect faults to occur only to	 * addresses in user space.  All other faults represent errors in the	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an	 * erroneous fault occuring in a code path which already holds mmap_sem	 * we will deadlock attempting to validate the fault against the	 * address space.  Luckily the kernel only validly references user	 * space from well defined areas of code, which are listed in the	 * exceptions table.	 *	 * As the vast majority of faults will be valid we will only perform	 * the source reference check when there is a possibilty of a deadlock.	 * Attempt to lock the address space, if we cannot we then validate the	 * source.  If this is invalid we can skip the address space check,	 * thus avoiding the deadlock.	 */	if (!down_read_trylock(&mm->mmap_sem)) {		if ((error_code & 4) == 0 &&		    !search_exception_tables(regs->eip))			goto bad_area_nosemaphore;		down_read(&mm->mmap_sem);	}	vma = find_vma(mm, address);	if (!vma)		goto bad_area;	if (vma->vm_start <= address)		goto good_area;	if (!(vma->vm_flags & VM_GROWSDOWN))		goto bad_area;	if (error_code & 4) {		/*		 * accessing the stack below %esp is always a bug.		 * The "+ 32" is there due to some instructions (like		 * pusha) doing post-decrement on the stack and that		 * doesn't show up until later..		 */		if (address + 32 < regs->esp)			goto bad_area;	}	if (expand_stack(vma, address))		goto bad_area;/* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */good_area:	si_code = SEGV_ACCERR;	write = 0;	switch (error_code & 3) {		default:	/* 3: write, present */#ifdef TEST_VERIFY_AREA			if (regs->cs == KERNEL_CS)				printk("WP fault at %08lx\n", regs->eip);#endif			/* fall through */		case 2:		/* write, not present */			if (!(vma->vm_flags & VM_WRITE))				goto bad_area;			write++;			break;		case 1:		/* read, present */			goto bad_area;		case 0:		/* read, not present */			if (!(vma->vm_flags & (VM_READ | VM_EXEC)))				goto bad_area;	} survive:	/*	 * If for any reason at all we couldn't handle the fault,	 * make sure we exit gracefully rather than endlessly redo	 * the fault.	 */	switch (handle_mm_fault(mm, vma, address, write)) {		case VM_FAULT_MINOR:			tsk->min_flt++;			break;		case VM_FAULT_MAJOR:			tsk->maj_flt++;			break;		case VM_FAULT_SIGBUS:			goto do_sigbus;		case VM_FAULT_OOM:			goto out_of_memory;		default:			BUG();	}	/*	 * Did it hit the DOS screen memory VA from vm86 mode?	 */	if (regs->eflags & VM_MASK) {		unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;		if (bit < 32)			tsk->thread.screen_bitmap |= 1 << bit;	}	up_read(&mm->mmap_sem);	return;/* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */bad_area:	up_read(&mm->mmap_sem);bad_area_nosemaphore:	/* User mode accesses just cause a SIGSEGV */	if (error_code & 4) {		/* 		 * Valid to do another page fault here because this one came 		 * from user space.		 */		if (is_prefetch(regs, address, error_code))			return;		tsk->thread.cr2 = address;		/* Kernel addresses are always protection faults */		tsk->thread.error_code = error_code | (address >= TASK_SIZE);		tsk->thread.trap_no = 14;		force_sig_info_fault(SIGSEGV, si_code, address, tsk);		return;	}#ifdef CONFIG_X86_F00F_BUG	/*	 * Pentium F0 0F C7 C8 bug workaround.	 */	if (boot_cpu_data.f00f_bug) {		unsigned long nr;				nr = (address - idt_descr.address) >> 3;		if (nr == 6) {			do_invalid_op(regs, 0);			return;		}	}#endifno_context:	/* Are we prepared to handle this kernel fault?  */	if (fixup_exception(regs))		return;	/* 	 * Valid to do another page fault here, because if this fault	 * had been triggered by is_prefetch fixup_exception would have 	 * handled it.	 */ 	if (is_prefetch(regs, address, error_code)) 		return;/* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */	bust_spinlocks(1);	if (oops_may_print()) {	#ifdef CONFIG_X86_PAE		if (error_code & 16) {			pte_t *pte = lookup_address(address);			if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))				printk(KERN_CRIT "kernel tried to execute "					"NX-protected page - exploit attempt? "					"(uid: %d)\n", current->uid);		}	#endif		if (address < PAGE_SIZE)			printk(KERN_ALERT "BUG: unable to handle kernel NULL "					"pointer dereference");		else			printk(KERN_ALERT "BUG: unable to handle kernel paging"					" request");		printk(" at virtual address %08lx\n",address);		printk(KERN_ALERT " printing eip:\n");		printk("%08lx\n", regs->eip);	}	page = read_cr3();	page = ((unsigned long *) __va(page))[address >> 22];	if (oops_may_print())		printk(KERN_ALERT "*pde = %08lx\n", page);	/*	 * We must not directly access the pte in the highpte	 * case, the page table might be allocated in highmem.	 * And lets rather not kmap-atomic the pte, just in case	 * it's allocated already.	 */#ifndef CONFIG_HIGHPTE	if ((page & 1) && oops_may_print()) {		page &= PAGE_MASK;		address &= 0x003ff000;		page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];		printk(KERN_ALERT "*pte = %08lx\n", page);	}#endif	tsk->thread.cr2 = address;	tsk->thread.trap_no = 14;	tsk->thread.error_code = error_code;	die("Oops", regs, error_code);	bust_spinlocks(0);	do_exit(SIGKILL);/* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */out_of_memory:	up_read(&mm->mmap_sem);	if (tsk->pid == 1) {		yield();		down_read(&mm->mmap_sem);		goto survive;	}	printk("VM: killing process %s\n", tsk->comm);	if (error_code & 4)		do_exit(SIGKILL);	goto no_context;do_sigbus:	up_read(&mm->mmap_sem);	/* Kernel mode? Handle exceptions or die */	if (!(error_code & 4))		goto no_context;	/* User space => ok to do another page fault */	if (is_prefetch(regs, address, error_code))		return;	tsk->thread.cr2 = address;	tsk->thread.error_code = error_code;	tsk->thread.trap_no = 14;	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);}#ifndef CONFIG_X86_PAEvoid vmalloc_sync_all(void){	/*	 * Note that races in the updates of insync and start aren't	 * problematic: insync can only get set bits added, and updates to	 * start are only improving performance (without affecting correctness	 * if undone).	 */	static DECLARE_BITMAP(insync, PTRS_PER_PGD);	static unsigned long start = TASK_SIZE;	unsigned long address;	BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);	for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {		if (!test_bit(pgd_index(address), insync)) {			unsigned long flags;			struct page *page;			spin_lock_irqsave(&pgd_lock, flags);			for (page = pgd_list; page; page =					(struct page *)page->index)				if (!vmalloc_sync_one(page_address(page),								address)) {					BUG_ON(page != pgd_list);					break;				}			spin_unlock_irqrestore(&pgd_lock, flags);			if (!page)				set_bit(pgd_index(address), insync);		}		if (address == start && test_bit(pgd_index(address), insync))			start = address + PGDIR_SIZE;	}}#endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?