📄 memory.c
字号:
struct mm_struct *mm = vma->vm_mm; int retval; pte_t *pte; spinlock_t *ptl; retval = -EINVAL; if (PageAnon(page)) goto out; retval = -ENOMEM; flush_dcache_page(page); pte = get_locked_pte(mm, addr, &ptl); if (!pte) goto out; retval = -EBUSY; if (!pte_none(*pte)) goto out_unlock; /* Ok, finally just insert the thing.. */ get_page(page); inc_mm_counter(mm, file_rss); page_add_file_rmap(page); set_pte_at(mm, addr, pte, mk_pte(page, prot)); retval = 0; pte_unmap_unlock(pte, ptl); return retval;out_unlock: pte_unmap_unlock(pte, ptl);out: return retval;}/** * vm_insert_page - insert single page into user vma * @vma: user vma to map to * @addr: target user address of this page * @page: source kernel page * * This allows drivers to insert individual pages they've allocated * into a user vma. * * The page has to be a nice clean _individual_ kernel allocation. * If you allocate a compound page, you need to have marked it as * such (__GFP_COMP), or manually just split the page up yourself * (see split_page()). * * NOTE! Traditionally this was done with "remap_pfn_range()" which * took an arbitrary page protection parameter. This doesn't allow * that. Your vma protection will have to be set up correctly, which * means that if you want a shared writable mapping, you'd better * ask for a shared writable mapping! * * The page does not need to be reserved. */int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page){ if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; if (!page_count(page)) return -EINVAL; vma->vm_flags |= VM_INSERTPAGE; return insert_page(vma, addr, page, vma->vm_page_prot);}EXPORT_SYMBOL(vm_insert_page);static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t prot){ struct mm_struct *mm = vma->vm_mm; int retval; pte_t *pte, entry; spinlock_t *ptl; retval = -ENOMEM; pte = get_locked_pte(mm, addr, &ptl); if (!pte) goto out; retval = -EBUSY; if (!pte_none(*pte)) goto out_unlock; /* Ok, finally just insert the thing.. */ entry = pte_mkspecial(pfn_pte(pfn, prot)); set_pte_at(mm, addr, pte, entry); update_mmu_cache(vma, addr, entry); /* XXX: why not for insert_page? */ retval = 0;out_unlock: pte_unmap_unlock(pte, ptl);out: return retval;}/** * vm_insert_pfn - insert single pfn into user vma * @vma: user vma to map to * @addr: target user address of this page * @pfn: source kernel pfn * * Similar to vm_inert_page, this allows drivers to insert individual pages * they've allocated into a user vma. Same comments apply. * * This function should only be called from a vm_ops->fault handler, and * in that case the handler should return NULL. * * vma cannot be a COW mapping. * * As this is called only for pages that do not currently exist, we * do not need to flush old virtual caches or the TLB. */int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn){ int ret; pgprot_t pgprot = vma->vm_page_prot; /* * Technically, architectures with pte_special can avoid all these * restrictions (same for remap_pfn_range). However we would like * consistency in testing and feature parity among all, so we should * try to keep these invariants in place for everybody. */ BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == (VM_PFNMAP|VM_MIXEDMAP)); BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; if (track_pfn_vma_new(vma, &pgprot, pfn, PAGE_SIZE)) return -EINVAL; ret = insert_pfn(vma, addr, pfn, pgprot); if (ret) untrack_pfn_vma(vma, pfn, PAGE_SIZE); return ret;}EXPORT_SYMBOL(vm_insert_pfn);int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn){ BUG_ON(!(vma->vm_flags & VM_MIXEDMAP)); if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; /* * If we don't have pte special, then we have to use the pfn_valid() * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must* * refcount the page if pfn_valid is true (hence insert_page rather * than insert_pfn). */ if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) { struct page *page; page = pfn_to_page(pfn); return insert_page(vma, addr, page, vma->vm_page_prot); } return insert_pfn(vma, addr, pfn, vma->vm_page_prot);}EXPORT_SYMBOL(vm_insert_mixed);/* * maps a range of physical memory into the requested pages. the old * mappings are removed. any references to nonexistent pages results * in null mappings (currently treated as "copy-on-access") */static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot){ pte_t *pte; spinlock_t *ptl; pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; arch_enter_lazy_mmu_mode(); do { BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot))); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); return 0;}static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot){ pmd_t *pmd; unsigned long next; pfn -= addr >> PAGE_SHIFT; pmd = pmd_alloc(mm, pud, addr); if (!pmd) return -ENOMEM; do { next = pmd_addr_end(addr, end); if (remap_pte_range(mm, pmd, addr, next, pfn + (addr >> PAGE_SHIFT), prot)) return -ENOMEM; } while (pmd++, addr = next, addr != end); return 0;}static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot){ pud_t *pud; unsigned long next; pfn -= addr >> PAGE_SHIFT; pud = pud_alloc(mm, pgd, addr); if (!pud) return -ENOMEM; do { next = pud_addr_end(addr, end); if (remap_pmd_range(mm, pud, addr, next, pfn + (addr >> PAGE_SHIFT), prot)) return -ENOMEM; } while (pud++, addr = next, addr != end); return 0;}/** * remap_pfn_range - remap kernel memory to userspace * @vma: user vma to map to * @addr: target user address to start at * @pfn: physical address of kernel memory * @size: size of map area * @prot: page protection flags for this mapping * * Note: this is only safe if the mm semaphore is held when called. */int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t prot){ pgd_t *pgd; unsigned long next; unsigned long end = addr + PAGE_ALIGN(size); struct mm_struct *mm = vma->vm_mm; int err; /* * Physically remapped pages are special. Tell the * rest of the world about it: * VM_IO tells people not to look at these pages * (accesses can have side effects). * VM_RESERVED is specified all over the place, because * in 2.4 it kept swapout's vma scan off this vma; but * in 2.6 the LRU scan won't even find its pages, so this * flag means no more than count its pages in reserved_vm, * and omit it from core dump, even when VM_IO turned off. * VM_PFNMAP tells the core MM that the base pages are just * raw PFN mappings, and do not have a "struct page" associated * with them. * * There's a horrible special case to handle copy-on-write * behaviour that some programs depend on. We mark the "original" * un-COW'ed pages by matching them up with "vma->vm_pgoff". */ if (addr == vma->vm_start && end == vma->vm_end) vma->vm_pgoff = pfn; else if (is_cow_mapping(vma->vm_flags)) return -EINVAL; vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size)); if (err) { /* * To indicate that track_pfn related cleanup is not * needed from higher level routine calling unmap_vmas */ vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP); return -EINVAL; } BUG_ON(addr >= end); pfn -= addr >> PAGE_SHIFT; pgd = pgd_offset(mm, addr); flush_cache_range(vma, addr, end); do { next = pgd_addr_end(addr, end); err = remap_pud_range(mm, pgd, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) break; } while (pgd++, addr = next, addr != end); if (err) untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size)); return err;}EXPORT_SYMBOL(remap_pfn_range);static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, pte_fn_t fn, void *data){ pte_t *pte; int err; pgtable_t token; spinlock_t *uninitialized_var(ptl); pte = (mm == &init_mm) ? pte_alloc_kernel(pmd, addr) : pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; BUG_ON(pmd_huge(*pmd)); arch_enter_lazy_mmu_mode(); token = pmd_pgtable(*pmd); do { err = fn(pte, token, addr, data); if (err) break; } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); if (mm != &init_mm) pte_unmap_unlock(pte-1, ptl); return err;}static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, pte_fn_t fn, void *data){ pmd_t *pmd; unsigned long next; int err; BUG_ON(pud_huge(*pud)); pmd = pmd_alloc(mm, pud, addr); if (!pmd) return -ENOMEM; do { next = pmd_addr_end(addr, end); err = apply_to_pte_range(mm, pmd, addr, next, fn, data); if (err) break; } while (pmd++, addr = next, addr != end); return err;}static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, pte_fn_t fn, void *data){ pud_t *pud; unsigned long next; int err; pud = pud_alloc(mm, pgd, addr); if (!pud) return -ENOMEM; do { next = pud_addr_end(addr, end); err = apply_to_pmd_range(mm, pud, addr, next, fn, data); if (err) break; } while (pud++, addr = next, addr != end); return err;}/* * Scan a region of virtual memory, filling in page tables as necessary * and calling a provided function on each leaf page table. */int apply_to_page_range(struct mm_struct *mm, unsigned long addr, unsigned long size, pte_fn_t fn, void *data){ pgd_t *pgd; unsigned long next; unsigned long start = addr, end = addr + size; int err; BUG_ON(addr >= end); mmu_notifier_invalidate_range_start(mm, start, end); pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); err = apply_to_pud_range(mm, pgd, addr, next, fn, data); if (err) break; } while (pgd++, addr = next, addr != end); mmu_notifier_invalidate_range_end(mm, start, end); return err;}EXPORT_SYMBOL_GPL(apply_to_page_range);/* * handle_pte_fault chooses page fault handler according to an entry * which was read non-atomically. Before making any commitment, on * those architectures or configurations (e.g. i386 with PAE) which * might give a mix of unmatched parts, do_swap_page and do_file_page * must check under lock before unmapping the pte and proceeding * (but do_wp_page is only called after already making such a check; * and do_anonymous_page and do_no_page can safely check later on). */static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, pte_t *page_table, pte_t orig_pte){ int same = 1;#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) if (sizeof(pte_t) > sizeof(unsigned long)) { spinlock_t *ptl = pte_lockptr(mm, pmd); spin_lock(ptl); same = pte_same(*page_table, orig_pte); spin_unlock(ptl); }#endif pte_unmap(page_table); return same;}/* * Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when * servicing faults for write access. In the normal case, do always want * pte_mkwrite. But get_user_pages can cause write faults for mappings * that do not have writing enabled, when used by access_process_vm. */static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma){ if (likely(vma->vm_flags & VM_WRITE)) pte = pte_mkwrite(pte); return pte;}static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma){ /* * If the source page was a PFN mapping, we don't have * a "struct page" for it. We do a best-effort copy by * just copying from the original user address. If that * fails, we just zero-fill it. Live with it. */ if (unlikely(!src)) { void *kaddr = kmap_atomic(dst, KM_USER0); void __user *uaddr = (void __user *)(va & PAGE_MASK); /* * This really shouldn't fail, because the page is there * in the page tables. But it might just be unreadable, * in which case we just give up and fill the result with * zeroes. */ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) memset(kaddr, 0, PAGE_SIZE); kunmap_atomic(kaddr, KM_USER0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -