📄 mmu.c
字号:
/* * Memory mappings. Life was easier when 2G of memory was enough. * * The kernel memory starts at KZERO, with the text loaded at KZERO+1M * (9load sits under 1M during the load). The memory from KZERO to the * top of memory is mapped 1-1 with physical memory, starting at physical * address 0. All kernel memory and data structures (i.e., the entries stored * into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000, * then the kernel can only have 256MB of memory for itself. * * The 256M below KZERO comprises three parts. The lowest 4M is the * virtual page table, a virtual address representation of the current * page table tree. The second 4M is used for temporary per-process * mappings managed by kmap and kunmap. The remaining 248M is used * for global (shared by all procs and all processors) device memory * mappings and managed by vmap and vunmap. The total amount (256M) * could probably be reduced somewhat if desired. The largest device * mapping is that of the video card, and even though modern video cards * have embarrassing amounts of memory, the video drivers only use one * frame buffer worth (at most 16M). Each is described in more detail below. * * The VPT is a 4M frame constructed by inserting the pdb into itself. * This short-circuits one level of the page tables, with the result that * the contents of second-level page tables can be accessed at VPT. * We use the VPT to edit the page tables (see mmu) after inserting them * into the page directory. It is a convenient mechanism for mapping what * might be otherwise-inaccessible pages. The idea was borrowed from * the Exokernel. * * The VPT doesn't solve all our problems, because we still need to * prepare page directories before we can install them. For that, we * use tmpmap/tmpunmap, which map a single page at TMPADDR. */#include "u.h"#include "../port/lib.h"#include "mem.h"#include "dat.h"#include "fns.h"#include "io.h"/* * Simple segment descriptors with no translation. */#define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }#define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }#define EXEC16SEGM(p) { 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }#define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\ ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }Segdesc gdt[NGDT] ={[NULLSEG] { 0, 0}, /* null descriptor */[KDSEG] DATASEGM(0), /* kernel data/stack */[KESEG] EXECSEGM(0), /* kernel code */[UDSEG] DATASEGM(3), /* user data/stack */[UESEG] EXECSEGM(3), /* user code */[TSSSEG] TSSSEGM(0,0), /* tss segment */[KESEG16] EXEC16SEGM(0), /* kernel code 16-bit */};static int didmmuinit;static void taskswitch(ulong, ulong);static void memglobal(void);#define vpt ((ulong*)VPT)#define VPTX(va) (((ulong)(va))>>12)#define vpd (vpt+VPTX(VPT))voidmmuinit0(void){ memmove(m->gdt, gdt, sizeof gdt);}voidmmuinit(void){ ulong x, *p; ushort ptr[3]; didmmuinit = 1; if(0) print("vpt=%#.8ux vpd=%#.8lux kmap=%#.8ux\n", VPT, (ulong)vpd, KMAP); memglobal(); m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID; m->tss = malloc(sizeof(Tss)); memset(m->tss, 0, sizeof(Tss)); m->tss->iomap = 0xDFFF<<16; /* * We used to keep the GDT in the Mach structure, but it * turns out that that slows down access to the rest of the * page. Since the Mach structure is accessed quite often, * it pays off anywhere from a factor of 1.25 to 2 on real * hardware to separate them (the AMDs are more sensitive * than Intels in this regard). Under VMware it pays off * a factor of about 10 to 100. */ memmove(m->gdt, gdt, sizeof gdt); x = (ulong)m->tss; m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss); m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP; ptr[0] = sizeof(gdt)-1; x = (ulong)m->gdt; ptr[1] = x & 0xFFFF; ptr[2] = (x>>16) & 0xFFFF; lgdt(ptr); ptr[0] = sizeof(Segdesc)*256-1; x = IDTADDR; ptr[1] = x & 0xFFFF; ptr[2] = (x>>16) & 0xFFFF; lidt(ptr); /* make kernel text unwritable */ for(x = KTZERO; x < (ulong)etext; x += BY2PG){ p = mmuwalk(m->pdb, x, 2, 0); if(p == nil) panic("mmuinit"); *p &= ~PTEWRITE; } taskswitch(PADDR(m->pdb), (ulong)m + BY2PG); ltr(TSSSEL);}/* * On processors that support it, we set the PTEGLOBAL bit in * page table and page directory entries that map kernel memory. * Doing this tells the processor not to bother flushing them * from the TLB when doing the TLB flush associated with a * context switch (write to CR3). Since kernel memory mappings * are never removed, this is safe. (If we ever remove kernel memory * mappings, we can do a full flush by turning off the PGE bit in CR4, * writing to CR3, and then turning the PGE bit back on.) * * See also mmukmap below. * * Processor support for the PTEGLOBAL bit is enabled in devarch.c. */static voidmemglobal(void){ int i, j; ulong *pde, *pte; /* only need to do this once, on bootstrap processor */ if(m->machno != 0) return; if(!m->havepge) return; pde = m->pdb; for(i=PDX(KZERO); i<1024; i++){ if(pde[i] & PTEVALID){ pde[i] |= PTEGLOBAL; if(!(pde[i] & PTESIZE)){ pte = KADDR(pde[i]&~(BY2PG-1)); for(j=0; j<1024; j++) if(pte[j] & PTEVALID) pte[j] |= PTEGLOBAL; } } } }/* * Flush all the user-space and device-mapping mmu info * for this process, because something has been deleted. * It will be paged back in on demand. */voidflushmmu(void){ int s; s = splhi(); up->newtlb = 1; mmuswitch(up); splx(s);}/* * Flush a single page mapping from the tlb. */voidflushpg(ulong va){ if(X86FAMILY(m->cpuidax) >= 4) invlpg(va); else putcr3(getcr3());} /* * Allocate a new page for a page directory. * We keep a small cache of pre-initialized * page directories in each mach. */static Page*mmupdballoc(void){ int s; Page *page; ulong *pdb; s = splhi(); m->pdballoc++; if(m->pdbpool == 0){ spllo(); page = newpage(0, 0, 0); page->va = (ulong)vpd; splhi(); pdb = tmpmap(page); memmove(pdb, m->pdb, BY2PG); pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID; /* set up VPT */ tmpunmap(pdb); }else{ page = m->pdbpool; m->pdbpool = page->next; m->pdbcnt--; } splx(s); return page;}static voidmmupdbfree(Proc *proc, Page *p){ if(islo()) panic("mmupdbfree: islo"); m->pdbfree++; if(m->pdbcnt >= 10){ p->next = proc->mmufree; proc->mmufree = p; }else{ p->next = m->pdbpool; m->pdbpool = p; m->pdbcnt++; }}/* * A user-space memory segment has been deleted, or the * process is exiting. Clear all the pde entries for user-space * memory mappings and device mappings. Any entries that * are needed will be paged back in as necessary. */static voidmmuptefree(Proc* proc){ int s; ulong *pdb; Page **last, *page; if(proc->mmupdb == nil || proc->mmuused == nil) return; s = splhi(); pdb = tmpmap(proc->mmupdb); last = &proc->mmuused; for(page = *last; page; page = page->next){ pdb[page->daddr] = 0; last = &page->next; } tmpunmap(pdb); splx(s); *last = proc->mmufree; proc->mmufree = proc->mmuused; proc->mmuused = 0;}static voidtaskswitch(ulong pdb, ulong stack){ Tss *tss; tss = m->tss; tss->ss0 = KDSEL; tss->esp0 = stack; tss->ss1 = KDSEL; tss->esp1 = stack; tss->ss2 = KDSEL; tss->esp2 = stack; putcr3(pdb);}voidmmuswitch(Proc* proc){ ulong *pdb; if(proc->newtlb){ mmuptefree(proc); proc->newtlb = 0; } if(proc->mmupdb){ pdb = tmpmap(proc->mmupdb); pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)]; tmpunmap(pdb); taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK)); }else taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK));}/* * Release any pages allocated for a page directory base or page-tables * for this process: * switch to the prototype pdb for this processor (m->pdb); * call mmuptefree() to place all pages used for page-tables (proc->mmuused) * onto the process' free list (proc->mmufree). This has the side-effect of * cleaning any user entries in the pdb (proc->mmupdb); * if there's a pdb put it in the cache of pre-initialised pdb's * for this processor (m->pdbpool) or on the process' free list; * finally, place any pages freed back into the free pool (palloc). * This routine is only called from schedinit() with palloc locked. */voidmmurelease(Proc* proc){ Page *page, *next; ulong *pdb; if(islo()) panic("mmurelease: islo"); taskswitch(PADDR(m->pdb), (ulong)m + BY2PG); if(proc->kmaptable){ if(proc->mmupdb == nil) panic("mmurelease: no mmupdb"); if(--proc->kmaptable->ref) panic("mmurelease: kmap ref %d\n", proc->kmaptable->ref); if(proc->nkmap) panic("mmurelease: nkmap %d\n", proc->nkmap); /* * remove kmaptable from pdb before putting pdb up for reuse. */ pdb = tmpmap(proc->mmupdb); if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa) panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux", pdb[PDX(KMAP)], proc->kmaptable->pa); pdb[PDX(KMAP)] = 0; tmpunmap(pdb); /* * move kmaptable to free list. */ pagechainhead(proc->kmaptable); proc->kmaptable = 0; } if(proc->mmupdb){ mmuptefree(proc); mmupdbfree(proc, proc->mmupdb); proc->mmupdb = 0; } for(page = proc->mmufree; page; page = next){ next = page->next; if(--page->ref) panic("mmurelease: page->ref %d\n", page->ref); pagechainhead(page); } if(proc->mmufree && palloc.r.p) wakeup(&palloc.r); proc->mmufree = 0;}/* * Allocate and install pdb for the current process. */static voidupallocpdb(void){ int s; ulong *pdb; Page *page; if(up->mmupdb != nil) return; page = mmupdballoc(); s = splhi(); if(up->mmupdb != nil){ /* * Perhaps we got an interrupt while * mmupdballoc was sleeping and that * interrupt allocated an mmupdb? * Seems unlikely. */ mmupdbfree(up, page); splx(s); return; } pdb = tmpmap(page); pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)]; tmpunmap(pdb); up->mmupdb = page; putcr3(up->mmupdb->pa); splx(s);}/* * Update the mmu in response to a user fault. pa may have PTEWRITE set. */voidputmmu(ulong va, ulong pa, Page*){ int old, s; Page *page; if(up->mmupdb == nil) upallocpdb(); /* * We should be able to get through this with interrupts * turned on (if we get interrupted we'll just pick up * where we left off) but we get many faults accessing * vpt[] near the end of this function, and they always happen * after the process has been switched out and then * switched back, usually many times in a row (perhaps * it cannot switch back successfully for some reason). * * In any event, I'm tired of searching for this bug. * Turn off interrupts during putmmu even though * we shouldn't need to. - rsc */ s = splhi(); if(!(vpd[PDX(va)]&PTEVALID)){ if(up->mmufree == 0){ spllo(); page = newpage(0, 0, 0); splhi(); } else{ page = up->mmufree; up->mmufree = page->next; } vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID; /* page is now mapped into the VPT - clear it */ memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG); page->daddr = PDX(va); page->next = up->mmuused; up->mmuused = page; } old = vpt[VPTX(va)]; vpt[VPTX(va)] = pa|PTEUSER|PTEVALID; if(old&PTEVALID) flushpg(va); if(getcr3() != up->mmupdb->pa) print("bad cr3 %.8lux %.8lux\n", getcr3(), up->mmupdb->pa); splx(s);}/* * Double-check the user MMU. * Error checking only. */voidcheckmmu(ulong va, ulong pa){ if(up->mmupdb == 0) return; if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID)) return; if(PPN(vpt[VPTX(va)]) != pa) print("%ld %s: va=0x%08lux pa=0x%08lux pte=0x%08lux\n", up->pid, up->text, va, pa, vpt[VPTX(va)]);}/* * Walk the page-table pointed to by pdb and return a pointer * to the entry for virtual address va at the requested level. * If the entry is invalid and create isn't requested then bail * out early. Otherwise, for the 2nd level walk, allocate a new * page-table page and register it in the 1st level. This is used * only to edit kernel mappings, which use pages from kernel memory, * so it's okay to use KADDR to look at the tables. */ulong*mmuwalk(ulong* pdb, ulong va, int level, int create){ ulong *table; void *map; table = &pdb[PDX(va)]; if(!(*table & PTEVALID) && create == 0) return 0; switch(level){ default: return 0; case 1: return table; case 2: if(*table & PTESIZE) panic("mmuwalk2: va %luX entry %luX\n", va, *table); if(!(*table & PTEVALID)){ /* * Have to call low-level allocator from * memory.c if we haven't set up the xalloc * tables yet. */ if(didmmuinit) map = xspanalloc(BY2PG, BY2PG, 0); else map = rampage(); if(map == nil) panic("mmuwalk xspanalloc failed"); *table = PADDR(map)|PTEWRITE|PTEVALID; } table = KADDR(PPN(*table)); return &table[PTX(va)]; }}/*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -