📄 mm.c
字号:
return p;}struct page_info *assign_new_domain_page(struct domain *d, unsigned long mpaddr){ volatile pte_t *pte = lookup_alloc_domain_pte(d, mpaddr); if (!pte_none(*pte)) return NULL; return __assign_new_domain_page(d, mpaddr, pte);}void __initassign_new_domain0_page(struct domain *d, unsigned long mpaddr){ volatile pte_t *pte; BUG_ON(d != dom0); pte = lookup_alloc_domain_pte(d, mpaddr); if (pte_none(*pte)) { struct page_info *p = __assign_new_domain_page(d, mpaddr, pte); if (p == NULL) { panic("%s: can't allocate page for dom0\n", __func__); } }}static unsigned longflags_to_prot (unsigned long flags){ unsigned long res = _PAGE_PL_PRIV | __DIRTY_BITS; res |= flags & ASSIGN_readonly ? _PAGE_AR_R: _PAGE_AR_RWX; res |= flags & ASSIGN_nocache ? _PAGE_MA_UC: _PAGE_MA_WB;#ifdef CONFIG_XEN_IA64_TLB_TRACK res |= flags & ASSIGN_tlb_track ? _PAGE_TLB_TRACKING: 0;#endif res |= flags & ASSIGN_pgc_allocated ? _PAGE_PGC_ALLOCATED: 0; res |= flags & ASSIGN_io ? _PAGE_IO: 0; return res;}/* map a physical address to the specified metaphysical addr */// flags: currently only ASSIGN_readonly, ASSIGN_nocache, ASSIGN_tlb_tack// This is called by assign_domain_mmio_page().// So accessing to pte is racy.int__assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr, unsigned long flags){ volatile pte_t *pte; pte_t old_pte; pte_t new_pte; pte_t ret_pte; unsigned long prot = flags_to_prot(flags); pte = lookup_alloc_domain_pte(d, mpaddr); old_pte = __pte(0); new_pte = pfn_pte(physaddr >> PAGE_SHIFT, __pgprot(prot)); ret_pte = ptep_cmpxchg_rel(&d->arch.mm, mpaddr, pte, old_pte, new_pte); if (pte_val(ret_pte) == pte_val(old_pte)) { smp_mb(); return 0; } // dom0 tries to map real machine's I/O region, but failed. // It is very likely that dom0 doesn't boot correctly because // it can't access I/O. So complain here. if (flags & ASSIGN_nocache) { int warn = 0; if (pte_pfn(ret_pte) != (physaddr >> PAGE_SHIFT)) warn = 1; else if (!(pte_val(ret_pte) & _PAGE_MA_UC)) { u32 type; u64 attr; warn = 1; /* * See * complete_dom0_memmap() * case EFI_RUNTIME_SERVICES_CODE: * case EFI_RUNTIME_SERVICES_DATA: * case EFI_ACPI_RECLAIM_MEMORY: * case EFI_ACPI_MEMORY_NVS: * case EFI_RESERVED_TYPE: * * Currently only EFI_RUNTIME_SERVICES_CODE is found * so that we suppress only EFI_RUNTIME_SERVICES_CODE case. */ type = efi_mem_type(physaddr); attr = efi_mem_attributes(physaddr); if (type == EFI_RUNTIME_SERVICES_CODE && (attr & EFI_MEMORY_UC) && (attr & EFI_MEMORY_WB)) warn = 0; } if (warn) printk("%s:%d WARNING can't assign page domain 0x%p id %d\n" "\talready assigned pte_val 0x%016lx\n" "\tmpaddr 0x%016lx physaddr 0x%016lx flags 0x%lx\n", __func__, __LINE__, d, d->domain_id, pte_val(ret_pte), mpaddr, physaddr, flags); } return -EAGAIN;}/* get_page() and map a physical address to the specified metaphysical addr */voidassign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr){ struct page_info* page = mfn_to_page(physaddr >> PAGE_SHIFT); BUG_ON((physaddr & _PAGE_PPN_MASK) != physaddr); BUG_ON(page->count_info != (PGC_allocated | 1)); set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT); // because __assign_domain_page() uses set_pte_rel() which has // release semantics, smp_mb() isn't needed. (void)__assign_domain_page(d, mpaddr, physaddr, ASSIGN_writable | ASSIGN_pgc_allocated);}intioports_permit_access(struct domain *d, unsigned int fp, unsigned int lp){ struct io_space *space; unsigned long mmio_start, mmio_end, mach_start; int ret; if (IO_SPACE_NR(fp) >= num_io_spaces) { dprintk(XENLOG_WARNING, "Unknown I/O Port range 0x%x - 0x%x\n", fp, lp); return -EFAULT; } /* * The ioport_cap rangeset tracks the I/O port address including * the port space ID. This means port space IDs need to match * between Xen and dom0. This is also a requirement because * the hypercall to pass these port ranges only uses a u32. * * NB - non-dom0 driver domains may only have a subset of the * I/O port spaces and thus will number port spaces differently. * This is ok, they don't make use of this interface. */ ret = rangeset_add_range(d->arch.ioport_caps, fp, lp); if (ret != 0) return ret; space = &io_space[IO_SPACE_NR(fp)]; /* Legacy I/O on dom0 is already setup */ if (d == dom0 && space == &io_space[0]) return 0; fp = IO_SPACE_PORT(fp); lp = IO_SPACE_PORT(lp); if (space->sparse) { mmio_start = IO_SPACE_SPARSE_ENCODING(fp) & ~PAGE_MASK; mmio_end = PAGE_ALIGN(IO_SPACE_SPARSE_ENCODING(lp)); } else { mmio_start = fp & ~PAGE_MASK; mmio_end = PAGE_ALIGN(lp); } /* * The "machine first port" is not necessarily identity mapped * to the guest first port. At least for the legacy range. */ mach_start = mmio_start | __pa(space->mmio_base); if (space == &io_space[0]) { mmio_start |= IO_PORTS_PADDR; mmio_end |= IO_PORTS_PADDR; } else { mmio_start |= __pa(space->mmio_base); mmio_end |= __pa(space->mmio_base); } while (mmio_start <= mmio_end) { (void)__assign_domain_page(d, mmio_start, mach_start, ASSIGN_nocache); mmio_start += PAGE_SIZE; mach_start += PAGE_SIZE; } return 0;}static intioports_has_allowed(struct domain *d, unsigned int fp, unsigned int lp){ for (; fp < lp; fp++) if (rangeset_contains_singleton(d->arch.ioport_caps, fp)) return 1; return 0;}intioports_deny_access(struct domain *d, unsigned int fp, unsigned int lp){ int ret; struct mm_struct *mm = &d->arch.mm; unsigned long mmio_start, mmio_end, mmio_base; unsigned int fp_base, lp_base; struct io_space *space; if (IO_SPACE_NR(fp) >= num_io_spaces) { dprintk(XENLOG_WARNING, "Unknown I/O Port range 0x%x - 0x%x\n", fp, lp); return -EFAULT; } ret = rangeset_remove_range(d->arch.ioport_caps, fp, lp); if (ret != 0) return ret; space = &io_space[IO_SPACE_NR(fp)]; fp_base = IO_SPACE_PORT(fp); lp_base = IO_SPACE_PORT(lp); if (space->sparse) { mmio_start = IO_SPACE_SPARSE_ENCODING(fp_base) & ~PAGE_MASK; mmio_end = PAGE_ALIGN(IO_SPACE_SPARSE_ENCODING(lp_base)); } else { mmio_start = fp_base & ~PAGE_MASK; mmio_end = PAGE_ALIGN(lp_base); } if (space == &io_space[0] && d != dom0) mmio_base = IO_PORTS_PADDR; else mmio_base = __pa(space->mmio_base); for (; mmio_start < mmio_end; mmio_start += PAGE_SIZE) { unsigned int port, range; unsigned long mpaddr; volatile pte_t *pte; pte_t old_pte; if (space->sparse) { port = IO_SPACE_SPARSE_DECODING(mmio_start); range = IO_SPACE_SPARSE_PORTS_PER_PAGE - 1; } else { port = mmio_start; range = PAGE_SIZE - 1; } port |= IO_SPACE_BASE(IO_SPACE_NR(fp)); if (port < fp || port + range > lp) { /* Maybe this covers an allowed port. */ if (ioports_has_allowed(d, port, port + range)) continue; } mpaddr = mmio_start | mmio_base; pte = lookup_noalloc_domain_pte_none(d, mpaddr); BUG_ON(pte == NULL); BUG_ON(pte_none(*pte)); /* clear pte */ old_pte = ptep_get_and_clear(mm, mpaddr, pte); } domain_flush_vtlb_all(d); return 0;}static voidassign_domain_same_page(struct domain *d, unsigned long mpaddr, unsigned long size, unsigned long flags){ //XXX optimization unsigned long end = PAGE_ALIGN(mpaddr + size); for (mpaddr &= PAGE_MASK; mpaddr < end; mpaddr += PAGE_SIZE) { (void)__assign_domain_page(d, mpaddr, mpaddr, flags); }}intefi_mmio(unsigned long physaddr, unsigned long size){ void *efi_map_start, *efi_map_end; u64 efi_desc_size; void* p; efi_map_start = __va(ia64_boot_param->efi_memmap); efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; efi_desc_size = ia64_boot_param->efi_memdesc_size; for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { efi_memory_desc_t* md = (efi_memory_desc_t *)p; unsigned long start = md->phys_addr; unsigned long end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); if (start <= physaddr && physaddr < end) { if ((physaddr + size) > end) { gdprintk(XENLOG_INFO, "%s: physaddr 0x%lx size = 0x%lx\n", __func__, physaddr, size); return 0; } // for io space if (md->type == EFI_MEMORY_MAPPED_IO || md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) { return 1; } // for runtime // see efi_enter_virtual_mode(void) // in linux/arch/ia64/kernel/efi.c if ((md->attribute & EFI_MEMORY_RUNTIME) && !(md->attribute & EFI_MEMORY_WB)) { return 1; } return 0; } if (physaddr < start) { break; } } return 1;}unsigned longassign_domain_mmio_page(struct domain *d, unsigned long mpaddr, unsigned long phys_addr, unsigned long size, unsigned long flags){ unsigned long addr = mpaddr & PAGE_MASK; unsigned long end = PAGE_ALIGN(mpaddr + size); if (size == 0) { gdprintk(XENLOG_INFO, "%s: domain %p mpaddr 0x%lx size = 0x%lx\n", __func__, d, mpaddr, size); } if (!efi_mmio(phys_addr, size)) {#ifndef NDEBUG gdprintk(XENLOG_INFO, "%s: domain %p mpaddr 0x%lx size = 0x%lx\n", __func__, d, mpaddr, size);#endif return -EINVAL; } for (phys_addr &= PAGE_MASK; addr < end; addr += PAGE_SIZE, phys_addr += PAGE_SIZE) { __assign_domain_page(d, addr, phys_addr, flags); } return mpaddr;}unsigned longassign_domain_mach_page(struct domain *d, unsigned long mpaddr, unsigned long size, unsigned long flags){ BUG_ON(flags & ASSIGN_pgc_allocated); assign_domain_same_page(d, mpaddr, size, flags); return mpaddr;}static voidadjust_page_count_info(struct page_info* page){ struct domain* d = page_get_owner(page); BUG_ON((page->count_info & PGC_count_mask) != 1); if (d != NULL) { int ret = get_page(page, d); BUG_ON(ret == 0); } else { u64 x, nx, y; y = *((u64*)&page->count_info); do { x = y; nx = x + 1; BUG_ON((x >> 32) != 0); BUG_ON((nx & PGC_count_mask) != 2); y = cmpxchg((u64*)&page->count_info, x, nx); } while (unlikely(y != x)); }}static voiddomain_put_page(struct domain* d, unsigned long mpaddr, volatile pte_t* ptep, pte_t old_pte, int clear_PGC_allocate){ unsigned long mfn = pte_pfn(old_pte); struct page_info* page = mfn_to_page(mfn); if (pte_pgc_allocated(old_pte)) { if (page_get_owner(page) == d || page_get_owner(page) == NULL) { BUG_ON(get_gpfn_from_mfn(mfn) != (mpaddr >> PAGE_SHIFT)); set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); } else { BUG(); } if (likely(clear_PGC_allocate)) { if (!test_and_clear_bit(_PGC_allocated, &page->count_info)) BUG(); /* put_page() is done by domain_page_flush_and_put() */ } else { // In this case, page reference count mustn't touched. // domain_page_flush_and_put() decrements it, we increment // it in advence. This patch is slow path. // // guest_remove_page(): owner = d, count_info = 1 // memory_exchange(): owner = NULL, count_info = 1 adjust_page_count_info(page); } } domain_page_flush_and_put(d, mpaddr, ptep, old_pte, page);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -