📄 memory.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
	struct mm_struct *mm = vma->vm_mm;	int retval;	pte_t *pte;	spinlock_t *ptl;	retval = -EINVAL;	if (PageAnon(page))		goto out;	retval = -ENOMEM;	flush_dcache_page(page);	pte = get_locked_pte(mm, addr, &ptl);	if (!pte)		goto out;	retval = -EBUSY;	if (!pte_none(*pte))		goto out_unlock;	/* Ok, finally just insert the thing.. */	get_page(page);	inc_mm_counter(mm, file_rss);	page_add_file_rmap(page);	set_pte_at(mm, addr, pte, mk_pte(page, prot));	retval = 0;	pte_unmap_unlock(pte, ptl);	return retval;out_unlock:	pte_unmap_unlock(pte, ptl);out:	return retval;}/** * vm_insert_page - insert single page into user vma * @vma: user vma to map to * @addr: target user address of this page * @page: source kernel page * * This allows drivers to insert individual pages they've allocated * into a user vma. * * The page has to be a nice clean _individual_ kernel allocation. * If you allocate a compound page, you need to have marked it as * such (__GFP_COMP), or manually just split the page up yourself * (see split_page()). * * NOTE! Traditionally this was done with "remap_pfn_range()" which * took an arbitrary page protection parameter. This doesn't allow * that. Your vma protection will have to be set up correctly, which * means that if you want a shared writable mapping, you'd better * ask for a shared writable mapping! * * The page does not need to be reserved. */int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,			struct page *page){	if (addr < vma->vm_start || addr >= vma->vm_end)		return -EFAULT;	if (!page_count(page))		return -EINVAL;	vma->vm_flags |= VM_INSERTPAGE;	return insert_page(vma, addr, page, vma->vm_page_prot);}EXPORT_SYMBOL(vm_insert_page);static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,			unsigned long pfn, pgprot_t prot){	struct mm_struct *mm = vma->vm_mm;	int retval;	pte_t *pte, entry;	spinlock_t *ptl;	retval = -ENOMEM;	pte = get_locked_pte(mm, addr, &ptl);	if (!pte)		goto out;	retval = -EBUSY;	if (!pte_none(*pte))		goto out_unlock;	/* Ok, finally just insert the thing.. */	entry = pte_mkspecial(pfn_pte(pfn, prot));	set_pte_at(mm, addr, pte, entry);	update_mmu_cache(vma, addr, entry); /* XXX: why not for insert_page? */	retval = 0;out_unlock:	pte_unmap_unlock(pte, ptl);out:	return retval;}/** * vm_insert_pfn - insert single pfn into user vma * @vma: user vma to map to * @addr: target user address of this page * @pfn: source kernel pfn * * Similar to vm_inert_page, this allows drivers to insert individual pages * they've allocated into a user vma. Same comments apply. * * This function should only be called from a vm_ops->fault handler, and * in that case the handler should return NULL. * * vma cannot be a COW mapping. * * As this is called only for pages that do not currently exist, we * do not need to flush old virtual caches or the TLB. */int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,			unsigned long pfn){	int ret;	pgprot_t pgprot = vma->vm_page_prot;	/*	 * Technically, architectures with pte_special can avoid all these	 * restrictions (same for remap_pfn_range).  However we would like	 * consistency in testing and feature parity among all, so we should	 * try to keep these invariants in place for everybody.	 */	BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));	BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==						(VM_PFNMAP|VM_MIXEDMAP));	BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));	BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));	if (addr < vma->vm_start || addr >= vma->vm_end)		return -EFAULT;	if (track_pfn_vma_new(vma, &pgprot, pfn, PAGE_SIZE))		return -EINVAL;	ret = insert_pfn(vma, addr, pfn, pgprot);	if (ret)		untrack_pfn_vma(vma, pfn, PAGE_SIZE);	return ret;}EXPORT_SYMBOL(vm_insert_pfn);int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,			unsigned long pfn){	BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));	if (addr < vma->vm_start || addr >= vma->vm_end)		return -EFAULT;	/*	 * If we don't have pte special, then we have to use the pfn_valid()	 * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*	 * refcount the page if pfn_valid is true (hence insert_page rather	 * than insert_pfn).	 */	if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) {		struct page *page;		page = pfn_to_page(pfn);		return insert_page(vma, addr, page, vma->vm_page_prot);	}	return insert_pfn(vma, addr, pfn, vma->vm_page_prot);}EXPORT_SYMBOL(vm_insert_mixed);/* * maps a range of physical memory into the requested pages. the old * mappings are removed. any references to nonexistent pages results * in null mappings (currently treated as "copy-on-access") */static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,			unsigned long addr, unsigned long end,			unsigned long pfn, pgprot_t prot){	pte_t *pte;	spinlock_t *ptl;	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);	if (!pte)		return -ENOMEM;	arch_enter_lazy_mmu_mode();	do {		BUG_ON(!pte_none(*pte));		set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));		pfn++;	} while (pte++, addr += PAGE_SIZE, addr != end);	arch_leave_lazy_mmu_mode();	pte_unmap_unlock(pte - 1, ptl);	return 0;}static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,			unsigned long addr, unsigned long end,			unsigned long pfn, pgprot_t prot){	pmd_t *pmd;	unsigned long next;	pfn -= addr >> PAGE_SHIFT;	pmd = pmd_alloc(mm, pud, addr);	if (!pmd)		return -ENOMEM;	do {		next = pmd_addr_end(addr, end);		if (remap_pte_range(mm, pmd, addr, next,				pfn + (addr >> PAGE_SHIFT), prot))			return -ENOMEM;	} while (pmd++, addr = next, addr != end);	return 0;}static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,			unsigned long addr, unsigned long end,			unsigned long pfn, pgprot_t prot){	pud_t *pud;	unsigned long next;	pfn -= addr >> PAGE_SHIFT;	pud = pud_alloc(mm, pgd, addr);	if (!pud)		return -ENOMEM;	do {		next = pud_addr_end(addr, end);		if (remap_pmd_range(mm, pud, addr, next,				pfn + (addr >> PAGE_SHIFT), prot))			return -ENOMEM;	} while (pud++, addr = next, addr != end);	return 0;}/** * remap_pfn_range - remap kernel memory to userspace * @vma: user vma to map to * @addr: target user address to start at * @pfn: physical address of kernel memory * @size: size of map area * @prot: page protection flags for this mapping * *  Note: this is only safe if the mm semaphore is held when called. */int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,		    unsigned long pfn, unsigned long size, pgprot_t prot){	pgd_t *pgd;	unsigned long next;	unsigned long end = addr + PAGE_ALIGN(size);	struct mm_struct *mm = vma->vm_mm;	int err;	/*	 * Physically remapped pages are special. Tell the	 * rest of the world about it:	 *   VM_IO tells people not to look at these pages	 *	(accesses can have side effects).	 *   VM_RESERVED is specified all over the place, because	 *	in 2.4 it kept swapout's vma scan off this vma; but	 *	in 2.6 the LRU scan won't even find its pages, so this	 *	flag means no more than count its pages in reserved_vm,	 * 	and omit it from core dump, even when VM_IO turned off.	 *   VM_PFNMAP tells the core MM that the base pages are just	 *	raw PFN mappings, and do not have a "struct page" associated	 *	with them.	 *	 * There's a horrible special case to handle copy-on-write	 * behaviour that some programs depend on. We mark the "original"	 * un-COW'ed pages by matching them up with "vma->vm_pgoff".	 */	if (addr == vma->vm_start && end == vma->vm_end)		vma->vm_pgoff = pfn;	else if (is_cow_mapping(vma->vm_flags))		return -EINVAL;	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;	err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size));	if (err) {		/*		 * To indicate that track_pfn related cleanup is not		 * needed from higher level routine calling unmap_vmas		 */		vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP);		return -EINVAL;	}	BUG_ON(addr >= end);	pfn -= addr >> PAGE_SHIFT;	pgd = pgd_offset(mm, addr);	flush_cache_range(vma, addr, end);	do {		next = pgd_addr_end(addr, end);		err = remap_pud_range(mm, pgd, addr, next,				pfn + (addr >> PAGE_SHIFT), prot);		if (err)			break;	} while (pgd++, addr = next, addr != end);	if (err)		untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size));	return err;}EXPORT_SYMBOL(remap_pfn_range);static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,				     unsigned long addr, unsigned long end,				     pte_fn_t fn, void *data){	pte_t *pte;	int err;	pgtable_t token;	spinlock_t *uninitialized_var(ptl);	pte = (mm == &init_mm) ?		pte_alloc_kernel(pmd, addr) :		pte_alloc_map_lock(mm, pmd, addr, &ptl);	if (!pte)		return -ENOMEM;	BUG_ON(pmd_huge(*pmd));	arch_enter_lazy_mmu_mode();	token = pmd_pgtable(*pmd);	do {		err = fn(pte, token, addr, data);		if (err)			break;	} while (pte++, addr += PAGE_SIZE, addr != end);	arch_leave_lazy_mmu_mode();	if (mm != &init_mm)		pte_unmap_unlock(pte-1, ptl);	return err;}static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,				     unsigned long addr, unsigned long end,				     pte_fn_t fn, void *data){	pmd_t *pmd;	unsigned long next;	int err;	BUG_ON(pud_huge(*pud));	pmd = pmd_alloc(mm, pud, addr);	if (!pmd)		return -ENOMEM;	do {		next = pmd_addr_end(addr, end);		err = apply_to_pte_range(mm, pmd, addr, next, fn, data);		if (err)			break;	} while (pmd++, addr = next, addr != end);	return err;}static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,				     unsigned long addr, unsigned long end,				     pte_fn_t fn, void *data){	pud_t *pud;	unsigned long next;	int err;	pud = pud_alloc(mm, pgd, addr);	if (!pud)		return -ENOMEM;	do {		next = pud_addr_end(addr, end);		err = apply_to_pmd_range(mm, pud, addr, next, fn, data);		if (err)			break;	} while (pud++, addr = next, addr != end);	return err;}/* * Scan a region of virtual memory, filling in page tables as necessary * and calling a provided function on each leaf page table. */int apply_to_page_range(struct mm_struct *mm, unsigned long addr,			unsigned long size, pte_fn_t fn, void *data){	pgd_t *pgd;	unsigned long next;	unsigned long start = addr, end = addr + size;	int err;	BUG_ON(addr >= end);	mmu_notifier_invalidate_range_start(mm, start, end);	pgd = pgd_offset(mm, addr);	do {		next = pgd_addr_end(addr, end);		err = apply_to_pud_range(mm, pgd, addr, next, fn, data);		if (err)			break;	} while (pgd++, addr = next, addr != end);	mmu_notifier_invalidate_range_end(mm, start, end);	return err;}EXPORT_SYMBOL_GPL(apply_to_page_range);/* * handle_pte_fault chooses page fault handler according to an entry * which was read non-atomically.  Before making any commitment, on * those architectures or configurations (e.g. i386 with PAE) which * might give a mix of unmatched parts, do_swap_page and do_file_page * must check under lock before unmapping the pte and proceeding * (but do_wp_page is only called after already making such a check; * and do_anonymous_page and do_no_page can safely check later on). */static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,				pte_t *page_table, pte_t orig_pte){	int same = 1;#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)	if (sizeof(pte_t) > sizeof(unsigned long)) {		spinlock_t *ptl = pte_lockptr(mm, pmd);		spin_lock(ptl);		same = pte_same(*page_table, orig_pte);		spin_unlock(ptl);	}#endif	pte_unmap(page_table);	return same;}/* * Do pte_mkwrite, but only if the vma says VM_WRITE.  We do this when * servicing faults for write access.  In the normal case, do always want * pte_mkwrite.  But get_user_pages can cause write faults for mappings * that do not have writing enabled, when used by access_process_vm. */static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma){	if (likely(vma->vm_flags & VM_WRITE))		pte = pte_mkwrite(pte);	return pte;}static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma){	/*	 * If the source page was a PFN mapping, we don't have	 * a "struct page" for it. We do a best-effort copy by	 * just copying from the original user address. If that	 * fails, we just zero-fill it. Live with it.	 */	if (unlikely(!src)) {		void *kaddr = kmap_atomic(dst, KM_USER0);		void __user *uaddr = (void __user *)(va & PAGE_MASK);		/*		 * This really shouldn't fail, because the page is there		 * in the page tables. But it might just be unreadable,		 * in which case we just give up and fill the result with		 * zeroes.		 */		if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))			memset(kaddr, 0, PAGE_SIZE);		kunmap_atomic(kaddr, KM_USER0);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -