📄 pagefault.cxx
字号:
/* * Copyright (C) 1998, 1999, 2001, Jonathan S. Shapiro. * * This file is part of the EROS Operating System. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2, * or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *//* Drivers for 386 protection faults */#include <kerninc/kernel.hxx>#include <eros/ProcessState.h>#include <eros/memory.h>#include <eros/SysTraceKey.h>#include <kerninc/Check.hxx>#include <kerninc/Thread.hxx>#include <kerninc/Node.hxx>#include <kerninc/ObjectCache.hxx>#include <kerninc/Depend.hxx>#include <kerninc/Machine.hxx>#include <kerninc/Debug.hxx>#include <kerninc/util.h>#include <kerninc/Process.hxx>#include <kerninc/SegWalk.hxx>#include <arch-kerninc/Process.hxx>#include <arch-kerninc/PTE.hxx>#include "IDT.hxx"#include "lostart.hxx"#include "Segment.hxx"#define dbg_pgflt 0x1 /* steps in taking snapshot *//* Following should be an OR of some of the above */#define dbg_flags ( 0u )#define DEBUG(x) if (dbg_##x & dbg_flags)/* Possible outcomes of a user-level page fault: * * 1. Fault was due to a not-present page, and address is not valid in * address space segment. Domain should be faulted and an attempt * made to invoke appropriate keeper. * * 2. Fault was due to a not-present page, but address IS valid in * address space segment. Segment indicates access is valid. * Construct the PDE and PTE for this mapping. If the object * referenced is hazarded, construct a write-hazard mapping even if * the object is in principle writable. Restart the process. * * 3. Fault was an access violation due to writing a page that in * principle was read-only. Domain should be faulted and an attempt * made to invoke appropriate keeper. * * 4. Fault was an access violation due to writing a page that has a * write-hazarded mapping. Two cases: If the hazard still applies, go * to sleep on the hazarded object. else validate the mapping for * writing and restart the operation. * * EROS does not provide write-only or execute-only mappings. * * * Meaning of the error code value for a page fault: * * Bit Value Meaning * 0 0 Not-present page * 0 1 Page-level protection violation * 1 0 Access was a read * 1 1 Access was a write * 2 0 Access was supervisor-mode * 2 1 Access was user-mode */extern "C" { extern uint32_t CpuType;#ifndef NDEBUG extern void end(); extern void start();#endif};bool PteZapped = false;static ObjectHeader *proc_MakeNewPageTable(SegWalk& wi, uint32_t ndx); static ObjectHeader*proc_MakeNewPageDirectory(SegWalk& wi); #ifdef OPTION_DDBvoidPTE::ddb_dump(){ extern void db_printf(const char *fmt, ...); char attrs[64]; char *nxtAttr = attrs; printf("Pg Frame 0x%08x [", PageFrame());#define ADDATTR(s) do { const char *sp = (s); *nxtAttr++ = ','; while (*sp) *nxtAttr++ = *sp++; } while (0) if (PTE_IS(*this, PTE_V)) ADDATTR("V"); else ADDATTR("!V"); if (PTE_IS(*this, PTE_W)) ADDATTR("W"); if (PTE_IS(*this, PTE_USER)) ADDATTR("U"); else ADDATTR("S"); if (PTE_IS(*this, PTE_ACC)) ADDATTR("A"); if (PTE_IS(*this, PTE_DRTY)) ADDATTR("D"); if (PTE_IS(*this, PTE_PGSZ)) ADDATTR("L"); if (PTE_IS(*this, PTE_GLBL)) ADDATTR("G"); if (PTE_IS(*this, PTE_WT)) ADDATTR("WT"); else ADDATTR("!WT"); if (PTE_IS(*this, PTE_CD)) ADDATTR("CD");#undef ADDATTR *nxtAttr++ = 0; printf("%s]\n", attrs);}#endifvoidDepend_InvalidateProduct(ObjectHeader* page){ /* InvalidateProduct is always called after the producing Node has * been unprepared (and thus zapped). If this is a page table, we * therefore know that all of it's entries are dead. * * If this is a page directory, however, we must find all of the * mapping table pointers that name it and zap those. */#if 0 dprintf(true, "NAILING product 0x%08x\n", page);#endif assert (page->obType == ObType::PtMappingPage); kpa_t mp_pa = VTOP(ObjectCache::ObHdrToPage(page)); /* MUST BE CAREFUL -- if this product is the active mapping table * we need to reset the kernel map to the native kernel map! */ kpa_t curmap = Machine::GetMappingTable(); if (mp_pa == curmap) {#if 0 dprintf(true, "Nailing active mapping table!\n");#endif Machine::SetMappingTable(KERNPAGEDIR); } if (page->producerNdx == EROS_NODE_LGSIZE) { for (uint32_t i = 0; i < KTUNE_NCONTEXT; i++) if (Process::ContextCache[i].fixRegs.MappingTable == mp_pa) Process::ContextCache[i].fixRegs.MappingTable = 0; } Machine::FlushTLB();}#ifndef NDEBUGboolPTE::ObIsNotWritable(ObjectHeader *pObj){ bool result = true; /* Start by building a writable PTE for the page: */ uint32_t kvaw = ObjectCache::ObHdrToPage(pObj); kvaw |= PTE_W;#ifdef OPTION_SMALL_SPACES /* Check small spaces first: */ uint32_t nFrames = KTUNE_NCONTEXT / 32; PTE *pte = Process::smallSpaces; for (unsigned i = 0; i < nFrames * NPTE_PER_PAGE; i++) { if ((pte[i].w_value & (PTE_FRAMEBITS|PTE_W)) == kvaw) { dprintf(true, "Checking pobj 0x%x with frame at 0x%x\n" "Pg hdr 0x%x retains writable small PTE at 0x%x\n", pObj, kvaw, pObj, &pte[i]); result = false; } }#endif for (uint32_t pf = 0; pf < ObjectCache::TotalPages(); pf++) { ObjectHeader *pHdr = ObjectCache::GetCorePageFrame(pf); if (pHdr->obType != ObType::PtMappingPage) continue; PTE *ptepg = (PTE *) ObjectCache::ObHdrToPage(pHdr); uint32_t limit = NPTE_PER_PAGE; if (pHdr->producerNdx == EROS_NODE_LGSIZE) limit = (UMSGTOP >> 22); /* PAGE_ADDR_BITS + PAGE_TABLE_BITS */ for (uint32_t ent = 0; ent < limit; ent++) { if ((ptepg[ent].w_value & (PTE_FRAMEBITS|PTE_W)) == kvaw) { dprintf(true, "Checking pobj 0x%x with frame at 0x%x\n" "Page hdr 0x%x retains writable PTE at 0x%x\n", pObj, kvaw, pHdr, &ptepg[ent]); result = false; } } } return result;}#endif /* !NDEBUG */voidDepend_WriteDisableProduct(ObjectHeader *pObj){ /* This is trickier than the case above, since we must not * write-disable kernel mappings. On the other hand, we don't need * to worry about blasting the current mapping table either. */ assert (pObj->obType == ObType::PtMappingPage); kva_t mp_va = ObjectCache::ObHdrToPage(pObj); /* Each mapping table holds 1024 entries, but the uppermost portion * of the address space is reserved for the kernel and the small * spaces. Kernel pages should not be write disabled, nor should * the small space directory entries (small space page table entries * are write disabled as a special case at the page table level). */ uint32_t limit = NPTE_PER_PAGE; if (pObj->producerNdx == EROS_NODE_LGSIZE) limit = (UMSGTOP >> 22); /* PAGE_ADDR_BITS + PAGE_TABLE_BITS */ PTE *pte = (PTE*) mp_va; for (uint32_t entry = 0; entry < limit; entry++) pte[entry].WriteProtect();}#ifdef INVOKE_TIMINGextern "C" { uint64_t rdtsc();};#endifboolPageFault(fixregs_t *sa){#ifdef OPTION_KERN_TIMING_STATS uint64_t top_time = rdtsc();#ifdef OPTION_KERN_EVENT_TRACING uint64_t top_cnt0 = Machine::ReadCounter(0); uint64_t top_cnt1 = Machine::ReadCounter(1);#endif#endif ula_t la = sa->ExceptAddr; uint32_t error = sa->Error; bool writeAccess = (error & 2) ? true : false; /* sa->Dump(); */ /* If we page faulted from supervisor mode it's trivial: */ if ( (error & 4) == 0) {#if 0 if ( (sa->EIP == (uint32_t) ipc_probe_start) || (sa->EIP == (uint32_t) ipc_probe_top) ) { printf("Send mapping fault 0\n"); sa->EAX = 1; sa->EIP = (uint32_t) ipc_probe_end; return; }#endif#if 0 sa->Dump();#endif fatal("Kernel page fault\n" " SaveArea = 0x%08x EIP = 0x%08x\n" " Fault address = 0x%08x Code = 0x%08x\n" " CS = 0x%08x\n", sa, sa->EIP, la, error, sa->CS); } /* Domain page fault. If we take this from kernel mode we are in * deep kimchee. */ Process* ctxt = (Process*) Thread::CurContext(); assert(& ctxt->fixRegs == sa); ctxt->stats.pfCount++; KernStats.nPfTraps++; if (writeAccess) KernStats.nPfAccess++; PteZapped = false; ObjectHeader::BeginTransaction(); (void) proc_DoPageFault(ctxt, la, writeAccess, false); /* We succeeded (wonder of wonders) -- release pinned resources. */ ObjectHeader::ReleasePinnedObjects(); /* No need to release uncommitted I/O page frames -- there should * not be any. */ assert(Thread::CurContext());#ifdef OPTION_KERN_TIMING_STATS { extern uint64_t pf_delta_cy; uint64_t bot_time = rdtsc();#ifdef OPTION_KERN_EVENT_TRACING extern uint64_t pf_delta_cnt0; extern uint64_t pf_delta_cnt1; uint64_t bot_cnt0 = Machine::ReadCounter(0); uint64_t bot_cnt1 = Machine::ReadCounter(1); pf_delta_cnt0 += (bot_cnt0 - top_cnt0); pf_delta_cnt1 += (bot_cnt1 - top_cnt1);#endif pf_delta_cy += (bot_time - top_time); }#endif return false;}#define DATA_PAGE_FLAGS (PTE_ACC|PTE_USER|PTE_V)#ifdef OPTION_SMALL_SPACESboolproc_DoSmallPageFault(Process * p, ula_t la, bool isWrite, bool prompt){#if 0 dprintf(true, "SmallPgFlt w/ la=0x%08x, bias=0x%08x," " limit=0x%08x\n", la, p->bias, p->limit);#endif /* Address is a linear address. Subtract the base and check the * bound. */ uva_t va = la - p->bias;#if 0 /* This assertion can be false given that the process can run from * any address space and the save logic may subsequently save that * address space pointer. The code is preserved here to keep a * record of the fact that this may be untrue so that I do not * forget and re-insert the assertion. */ assert ( fixRegs.MappingTable == KERNPAGEDIR );#endif SegWalk wi; wi.faultCode = FC_NoFault; wi.traverseCount = 0; wi.segObj = 0; wi.vaddr = va; wi.frameBits = EROS_PAGE_ADDR_BITS; wi.writeAccess = isWrite; wi.invokeKeeperOK = !prompt; wi.invokeProcessKeeperOK = !prompt; wi.wantLeafNode = false; segwalk_init(wi, p->GetSegRoot()); PTE& thePTE = p->smallPTE[(va >> EROS_PAGE_ADDR_BITS) % SMALL_SPACE_PAGES]; if (PTE_ISNOT(thePTE, PTE_V)) thePTE.w_value = PTE_IN_PROGRESS; /* If the virtual address falls outside the small range and is * valid, this walk will result in depend entries that blast the * wrong PTE. No real harm can come of that, since either the * address is bad or we will in that case be switching to a large * space anyway. */ /* Do the traversal... */ if ( !proc_WalkSeg(p, wi, EROS_PAGE_BLSS, &thePTE, 0, false) ) { p->SetFault(wi.faultCode, va, false); return false; } /* If the wrong dependency entry was reclaimed, we may lost * one of the depend entries for the PTE that is under construction, * in which case we must yield and retry. This is low-likelihood. */ if (thePTE.w_value == PTE_ZAPPED) Thread::Current()->Yield(); /* If we get this far, there is a valid translation at this address, * but if the address exceeds the small space limit we need to * convert the current address space into a large space. * * If this is necessary, set smallPTE to zero and YIELD, because we * might have been called from unside the IPC code. This will * induce another page fault, which will follow the large spaces * path. */ /* This should not happen on the invoker side, where the addresses * were validated during page probe (causing SS fault or GP fault). * On the invokee side, however, it is possible that we forced the * invokee to reload, in which case it loaded as a small space, and * we then tried to validate the receive address. In that case, * PopulateExitBlock() called us here with an out of bounds virtual * address. We reset the process mapping state and Yield(), * allowing the correct computation to be done in the next pass. */ if (va >= p->limit) {#if 0 dprintf(true, "!! la=0x%08x va=0x%08x\n" "Switching process 0x%X to large space\n", la, va, procRoot->ob.oid);#endif p->SwitchToLargeSpace(); Thread::Current()->Yield(); } kpa_t pageAddr = 0; ObjectHeader *pPageHdr = wi.segObj; if (isWrite) pPageHdr->MakeObjectDirty(); pageAddr = VTOP(ObjectCache::ObHdrToPage(pPageHdr)); assert ((pageAddr & EROS_PAGE_MASK) == 0); assert (pageAddr < PtoKPA(start) || pageAddr >= PtoKPA(end)); assert (va < (SMALL_SPACE_PAGES * EROS_PAGE_SIZE)); bool needInvalidate = false; if (isWrite && PTE_IS(thePTE, PTE_V)) { /* We are doing this because the old PTE had insufficient * permission, so we must zap the TLB. */ needInvalidate = true; } thePTE.Invalidate(); PTE_SET(thePTE, (pageAddr & PTE_FRAMEBITS) | DATA_PAGE_FLAGS); if (isWrite) PTE_SET(thePTE, PTE_W);#ifdef WRITE_THROUGH if (CpuType >= 5) PTE_SET(thePTE, PTE_WT);#endif#if 0 dprintf(true, "Built PTE at 0x%08x\n", &thePTE);#endif if (needInvalidate) Machine::FlushTLB(la); #ifdef DBG_WILD_PTR if (dbg_wild_ptr) Check::Consistency("End of DoSmallPageFault()");#endif return true;}#endifuint32_t DoPageFault_CallCounter;/* At some point, this logic will need to change to account for * background windows. In the event that we encounter a non-local * background window key we will need to do a complete traversal in * order to find the background segment, because the background * segment slot is not cached. * * Actually, this is contingent on a design distinction, which is * whether multiple red segments should be tracked on the way down the * segment tree. When we cross a KEPT red segment, we should * certainly forget any outstanding background segment -- we do not * want the red segment keeper to be able to fabricate a background * window key that might reference a segment over which the keeper
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -