uvm_fault.c

来自「基于组件方式开发操作系统的OSKIT源代码」· C语言 代码 · 共 1,896 行 · 第 1/4 页

C
1,896
字号
/*	$NetBSD: uvm_fault.c,v 1.52 2000/11/27 08:40:03 chs Exp $	*//* * * Copyright (c) 1997 Charles D. Cranor and Washington University. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in the *    documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software *    must display the following acknowledgement: *      This product includes software developed by Charles D. Cranor and *      Washington University. * 4. The name of the author may not be used to endorse or promote products *    derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp */#include "opt_uvmhist.h"/* * uvm_fault.c: fault handler */#include <sys/param.h>#include <sys/systm.h>#include <sys/kernel.h>#include <sys/proc.h>#include <sys/malloc.h>#include <sys/mman.h>#include <sys/user.h>#include <uvm/uvm.h>/* * * a word on page faults: * * types of page faults we handle: * * CASE 1: upper layer faults                   CASE 2: lower layer faults * *    CASE 1A         CASE 1B                  CASE 2A        CASE 2B *    read/write1     write>1                  read/write   +-cow_write/zero *         |             |                         |        |         *      +--|--+       +--|--+     +-----+       +  |  +     | +-----+ * amap |  V  |       |  ----------->new|          |        | |  ^  | *      +-----+       +-----+     +-----+       +  |  +     | +--|--+ *                                                 |        |    | *      +-----+       +-----+                   +--|--+     | +--|--+ * uobj | d/c |       | d/c |                   |  V  |     +----|  | *      +-----+       +-----+                   +-----+       +-----+ * * d/c = don't care *  *   case [0]: layerless fault *	no amap or uobj is present.   this is an error. * *   case [1]: upper layer fault [anon active] *     1A: [read] or [write with anon->an_ref == 1] *		I/O takes place in top level anon and uobj is not touched. *     1B: [write with anon->an_ref > 1] *		new anon is alloc'd and data is copied off ["COW"] * *   case [2]: lower layer fault [uobj] *     2A: [read on non-NULL uobj] or [write to non-copy_on_write area] *		I/O takes place directly in object. *     2B: [write to copy_on_write] or [read on NULL uobj] *		data is "promoted" from uobj to a new anon.    *		if uobj is null, then we zero fill. * * we follow the standard UVM locking protocol ordering: * * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ)  * we hold a PG_BUSY page if we unlock for I/O * * * the code is structured as follows: *   *     - init the "IN" params in the ufi structure *   ReFault: *     - do lookups [locks maps], check protection, handle needs_copy *     - check for case 0 fault (error) *     - establish "range" of fault *     - if we have an amap lock it and extract the anons *     - if sequential advice deactivate pages behind us *     - at the same time check pmap for unmapped areas and anon for pages *	 that we could map in (and do map it if found) *     - check object for resident pages that we could map in *     - if (case 2) goto Case2 *     - >>> handle case 1 *           - ensure source anon is resident in RAM *           - if case 1B alloc new anon and copy from source *           - map the correct page in *   Case2: *     - >>> handle case 2 *           - ensure source page is resident (if uobj) *           - if case 2B alloc new anon and copy from source (could be zero *		fill if uobj == NULL) *           - map the correct page in *     - done! * * note on paging: *   if we have to do I/O we place a PG_BUSY page in the correct object, * unlock everything, and do the I/O.   when I/O is done we must reverify * the state of the world before assuming that our data structures are * valid.   [because mappings could change while the map is unlocked] * *  alternative 1: unbusy the page in question and restart the page fault *    from the top (ReFault).   this is easy but does not take advantage *    of the information that we already have from our previous lookup,  *    although it is possible that the "hints" in the vm_map will help here. * * alternative 2: the system already keeps track of a "version" number of *    a map.   [i.e. every time you write-lock a map (e.g. to change a *    mapping) you bump the version number up by one...]   so, we can save *    the version number of the map before we release the lock and start I/O. *    then when I/O is done we can relock and check the version numbers *    to see if anything changed.    this might save us some over 1 because *    we don't have to unbusy the page and may be less compares(?). * * alternative 3: put in backpointers or a way to "hold" part of a map *    in place while I/O is in progress.   this could be complex to *    implement (especially with structures like amap that can be referenced *    by multiple map entries, and figuring out what should wait could be *    complex as well...). * * given that we are not currently multiprocessor or multithreaded we might * as well choose alternative 2 now.   maybe alternative 3 would be useful * in the future.    XXX keep in mind for future consideration//rechecking. *//* * local data structures */struct uvm_advice {	int advice;	int nback;	int nforw;};/* * page range array: * note: index in array must match "advice" value  * XXX: borrowed numbers from freebsd.   do they work well for us? */static struct uvm_advice uvmadvice[] = {	{ MADV_NORMAL, 3, 4 },	{ MADV_RANDOM, 0, 0 },	{ MADV_SEQUENTIAL, 8, 7},};#define UVM_MAXRANGE 16	/* must be max() of nback+nforw+1 *//* * private prototypes */static void uvmfault_amapcopy __P((struct uvm_faultinfo *));static __inline void uvmfault_anonflush __P((struct vm_anon **, int));/* * inline functions *//* * uvmfault_anonflush: try and deactivate pages in specified anons * * => does not have to deactivate page if it is busy */static __inline voiduvmfault_anonflush(anons, n)	struct vm_anon **anons;	int n;{	int lcv;	struct vm_page *pg;		for (lcv = 0 ; lcv < n ; lcv++) {		if (anons[lcv] == NULL)			continue;		simple_lock(&anons[lcv]->an_lock);		pg = anons[lcv]->u.an_page;		if (pg && (pg->flags & PG_BUSY) == 0 && pg->loan_count == 0) {			uvm_lock_pageq();			if (pg->wire_count == 0) {				pmap_page_protect(pg, VM_PROT_NONE);				uvm_pagedeactivate(pg);			}			uvm_unlock_pageq();		}		simple_unlock(&anons[lcv]->an_lock);	}}/* * normal functions *//* * uvmfault_amapcopy: clear "needs_copy" in a map. * * => called with VM data structures unlocked (usually, see below) * => we get a write lock on the maps and clear needs_copy for a VA * => if we are out of RAM we sleep (waiting for more) */static voiduvmfault_amapcopy(ufi)	struct uvm_faultinfo *ufi;{	/*	 * while we haven't done the job	 */	while (1) {		/*		 * no mapping?  give up.		 */		if (uvmfault_lookup(ufi, TRUE) == FALSE)			return;		/*		 * copy if needed.		 */		if (UVM_ET_ISNEEDSCOPY(ufi->entry))			amap_copy(ufi->map, ufi->entry, M_NOWAIT, TRUE, 				ufi->orig_rvaddr, ufi->orig_rvaddr + 1);		/*		 * didn't work?  must be out of RAM.   unlock and sleep.		 */		if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {			uvmfault_unlockmaps(ufi, TRUE);			uvm_wait("fltamapcopy");			continue;		}		/*		 * got it!   unlock and return.		 */				uvmfault_unlockmaps(ufi, TRUE);		return;	}	/*NOTREACHED*/}/* * uvmfault_anonget: get data in an anon into a non-busy, non-released * page in that anon. * * => maps, amap, and anon locked by caller. * => if we fail (result != VM_PAGER_OK) we unlock everything. * => if we are successful, we return with everything still locked. * => we don't move the page on the queues [gets moved later] * => if we allocate a new page [we_own], it gets put on the queues. *    either way, the result is that the page is on the queues at return time * => for pages which are on loan from a uvm_object (and thus are not *    owned by the anon): if successful, we return with the owning object *    locked.   the caller must unlock this object when it unlocks everything *    else. */intuvmfault_anonget(ufi, amap, anon)	struct uvm_faultinfo *ufi;	struct vm_amap *amap;	struct vm_anon *anon;{	boolean_t we_own;	/* we own anon's page? */	boolean_t locked;	/* did we relock? */	struct vm_page *pg;	int result;	UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist);	result = 0;		/* XXX shut up gcc */	uvmexp.fltanget++;        /* bump rusage counters */	if (anon->u.an_page)		curproc->p_addr->u_stats.p_ru.ru_minflt++;	else		curproc->p_addr->u_stats.p_ru.ru_majflt++;	/* 	 * loop until we get it, or fail.	 */	while (1) {		we_own = FALSE;		/* TRUE if we set PG_BUSY on a page */		pg = anon->u.an_page;		/*		 * if there is a resident page and it is loaned, then anon		 * may not own it.   call out to uvm_anon_lockpage() to ensure		 * the real owner of the page has been identified and locked.		 */		if (pg && pg->loan_count)			pg = uvm_anon_lockloanpg(anon);		/*		 * page there?   make sure it is not busy/released.		 */		if (pg) {			/*			 * at this point, if the page has a uobject [meaning			 * we have it on loan], then that uobject is locked			 * by us!   if the page is busy, we drop all the			 * locks (including uobject) and try again.			 */			if ((pg->flags & (PG_BUSY|PG_RELEASED)) == 0) {				UVMHIST_LOG(maphist, "<- OK",0,0,0,0);				return (VM_PAGER_OK);			}			pg->flags |= PG_WANTED;			uvmexp.fltpgwait++;			/*			 * the last unlock must be an atomic unlock+wait on			 * the owner of page			 */			if (pg->uobject) {	/* owner is uobject ? */				uvmfault_unlockall(ufi, amap, NULL, anon);				UVMHIST_LOG(maphist, " unlock+wait on uobj",0,				    0,0,0);				UVM_UNLOCK_AND_WAIT(pg,				    &pg->uobject->vmobjlock,				    FALSE, "anonget1",0);			} else {				/* anon owns page */				uvmfault_unlockall(ufi, amap, NULL, NULL);				UVMHIST_LOG(maphist, " unlock+wait on anon",0,				    0,0,0);				UVM_UNLOCK_AND_WAIT(pg,&anon->an_lock,0,				    "anonget2",0);			}			/* ready to relock and try again */		} else {					/*			 * no page, we must try and bring it in.			 */			pg = uvm_pagealloc(NULL, 0, anon, 0);			if (pg == NULL) {		/* out of RAM.  */				uvmfault_unlockall(ufi, amap, NULL, anon);				uvmexp.fltnoram++;				UVMHIST_LOG(maphist, "  noram -- UVM_WAIT",0,				    0,0,0);				uvm_wait("flt_noram1");				/* ready to relock and try again */			} else {					/* we set the PG_BUSY bit */				we_own = TRUE;					uvmfault_unlockall(ufi, amap, NULL, anon);				/*				 * we are passing a PG_BUSY+PG_FAKE+PG_CLEAN				 * page into the uvm_swap_get function with				 * all data structures unlocked.  note that				 * it is ok to read an_swslot here because				 * we hold PG_BUSY on the page.				 */				uvmexp.pageins++;				result = uvm_swap_get(pg, anon->an_swslot,				    PGO_SYNCIO);				/*				 * we clean up after the i/o below in the				 * "we_own" case				 */				/* ready to relock and try again */			}		}		/*		 * now relock and try again		 */		locked = uvmfault_relock(ufi);		if (locked && amap != NULL) {			amap_lock(amap);		}		if (locked || we_own)			simple_lock(&anon->an_lock);		/*		 * if we own the page (i.e. we set PG_BUSY), then we need		 * to clean up after the I/O. there are three cases to		 * consider:		 *   [1] page released during I/O: free anon and ReFault.		 *   [2] I/O not OK.   free the page and cause the fault 		 *       to fail.		 *   [3] I/O OK!   activate the page and sync with the		 *       non-we_own case (i.e. drop anon lock if not locked).		 */				if (we_own) {			if (pg->flags & PG_WANTED) {				/* still holding object lock */				wakeup(pg);				}			/* un-busy! */			pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);			UVM_PAGE_OWN(pg, NULL);			/* 			 * if we were RELEASED during I/O, then our anon is			 * no longer part of an amap.   we need to free the			 * anon and try again.			 */			if (pg->flags & PG_RELEASED) {				pmap_page_protect(pg, VM_PROT_NONE);				simple_unlock(&anon->an_lock);				uvm_anfree(anon);	/* frees page for us */				if (locked)					uvmfault_unlockall(ufi, amap, NULL,							   NULL);				uvmexp.fltpgrele++;				UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);				return (VM_PAGER_REFAULT);	/* refault! */			}			if (result != VM_PAGER_OK) {				KASSERT(result != VM_PAGER_PEND);				/* remove page from anon */				anon->u.an_page = NULL;				/*				 * remove the swap slot from the anon				 * and mark the anon as having no real slot.				 * don't free the swap slot, thus preventing				 * it from being used again.				 */				uvm_swap_markbad(anon->an_swslot, 1);				anon->an_swslot = SWSLOT_BAD;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?