📄 memory.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
 * And for normal mappings this is false. * * This restricts such mappings to be a linear translation from virtual address * to pfn. To get around this restriction, we allow arbitrary mappings so long * as the vma is not a COW mapping; in that case, we know that all ptes are * special (because none can have been COWed). * * * In order to support COW of arbitrary special mappings, we have VM_MIXEDMAP. * * VM_MIXEDMAP mappings can likewise contain memory with or without "struct * page" backing, however the difference is that _all_ pages with a struct * page (that is, those where pfn_valid is true) are refcounted and considered * normal pages by the VM. The disadvantage is that pages are refcounted * (which can be slower and simply not an option for some PFNMAP users). The * advantage is that we don't have to follow the strict linearity rule of * PFNMAP mappings in order to support COWable mappings. * */#ifdef __HAVE_ARCH_PTE_SPECIAL# define HAVE_PTE_SPECIAL 1#else# define HAVE_PTE_SPECIAL 0#endifstruct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,				pte_t pte){	unsigned long pfn = pte_pfn(pte);	if (HAVE_PTE_SPECIAL) {		if (likely(!pte_special(pte)))			goto check_pfn;		if (!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)))			print_bad_pte(vma, addr, pte, NULL);		return NULL;	}	/* !HAVE_PTE_SPECIAL case follows: */	if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {		if (vma->vm_flags & VM_MIXEDMAP) {			if (!pfn_valid(pfn))				return NULL;			goto out;		} else {			unsigned long off;			off = (addr - vma->vm_start) >> PAGE_SHIFT;			if (pfn == vma->vm_pgoff + off)				return NULL;			if (!is_cow_mapping(vma->vm_flags))				return NULL;		}	}check_pfn:	if (unlikely(pfn > highest_memmap_pfn)) {		print_bad_pte(vma, addr, pte, NULL);		return NULL;	}	/*	 * NOTE! We still have PageReserved() pages in the page tables.	 * eg. VDSO mappings can cause them to exist.	 */out:	return pfn_to_page(pfn);}/* * copy one vm_area from one task to the other. Assumes the page tables * already present in the new task to be cleared in the whole range * covered by this vma. */static inline voidcopy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,		unsigned long addr, int *rss){	unsigned long vm_flags = vma->vm_flags;	pte_t pte = *src_pte;	struct page *page;	/* pte contains position in swap or file, so copy. */	if (unlikely(!pte_present(pte))) {		if (!pte_file(pte)) {			swp_entry_t entry = pte_to_swp_entry(pte);			swap_duplicate(entry);			/* make sure dst_mm is on swapoff's mmlist. */			if (unlikely(list_empty(&dst_mm->mmlist))) {				spin_lock(&mmlist_lock);				if (list_empty(&dst_mm->mmlist))					list_add(&dst_mm->mmlist,						 &src_mm->mmlist);				spin_unlock(&mmlist_lock);			}			if (is_write_migration_entry(entry) &&					is_cow_mapping(vm_flags)) {				/*				 * COW mappings require pages in both parent				 * and child to be set to read.				 */				make_migration_entry_read(&entry);				pte = swp_entry_to_pte(entry);				set_pte_at(src_mm, addr, src_pte, pte);			}		}		goto out_set_pte;	}	/*	 * If it's a COW mapping, write protect it both	 * in the parent and the child	 */	if (is_cow_mapping(vm_flags)) {		ptep_set_wrprotect(src_mm, addr, src_pte);		pte = pte_wrprotect(pte);	}	/*	 * If it's a shared mapping, mark it clean in	 * the child	 */	if (vm_flags & VM_SHARED)		pte = pte_mkclean(pte);	pte = pte_mkold(pte);	page = vm_normal_page(vma, addr, pte);	if (page) {		get_page(page);		page_dup_rmap(page, vma, addr);		rss[!!PageAnon(page)]++;	}out_set_pte:	set_pte_at(dst_mm, addr, dst_pte, pte);}static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,		pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,		unsigned long addr, unsigned long end){	pte_t *src_pte, *dst_pte;	spinlock_t *src_ptl, *dst_ptl;	int progress = 0;	int rss[2];again:	rss[1] = rss[0] = 0;	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);	if (!dst_pte)		return -ENOMEM;	src_pte = pte_offset_map_nested(src_pmd, addr);	src_ptl = pte_lockptr(src_mm, src_pmd);	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);	arch_enter_lazy_mmu_mode();	do {		/*		 * We are holding two locks at this point - either of them		 * could generate latencies in another task on another CPU.		 */		if (progress >= 32) {			progress = 0;			if (need_resched() ||			    spin_needbreak(src_ptl) || spin_needbreak(dst_ptl))				break;		}		if (pte_none(*src_pte)) {			progress++;			continue;		}		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);		progress += 8;	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);	arch_leave_lazy_mmu_mode();	spin_unlock(src_ptl);	pte_unmap_nested(src_pte - 1);	add_mm_rss(dst_mm, rss[0], rss[1]);	pte_unmap_unlock(dst_pte - 1, dst_ptl);	cond_resched();	if (addr != end)		goto again;	return 0;}static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,		pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,		unsigned long addr, unsigned long end){	pmd_t *src_pmd, *dst_pmd;	unsigned long next;	dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);	if (!dst_pmd)		return -ENOMEM;	src_pmd = pmd_offset(src_pud, addr);	do {		next = pmd_addr_end(addr, end);		if (pmd_none_or_clear_bad(src_pmd))			continue;		if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,						vma, addr, next))			return -ENOMEM;	} while (dst_pmd++, src_pmd++, addr = next, addr != end);	return 0;}static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,		pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,		unsigned long addr, unsigned long end){	pud_t *src_pud, *dst_pud;	unsigned long next;	dst_pud = pud_alloc(dst_mm, dst_pgd, addr);	if (!dst_pud)		return -ENOMEM;	src_pud = pud_offset(src_pgd, addr);	do {		next = pud_addr_end(addr, end);		if (pud_none_or_clear_bad(src_pud))			continue;		if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,						vma, addr, next))			return -ENOMEM;	} while (dst_pud++, src_pud++, addr = next, addr != end);	return 0;}int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,		struct vm_area_struct *vma){	pgd_t *src_pgd, *dst_pgd;	unsigned long next;	unsigned long addr = vma->vm_start;	unsigned long end = vma->vm_end;	int ret;	/*	 * Don't copy ptes where a page fault will fill them correctly.	 * Fork becomes much lighter when there are big shared or private	 * readonly mappings. The tradeoff is that copy_page_range is more	 * efficient than faulting.	 */	if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) {		if (!vma->anon_vma)			return 0;	}	if (is_vm_hugetlb_page(vma))		return copy_hugetlb_page_range(dst_mm, src_mm, vma);	if (unlikely(is_pfn_mapping(vma))) {		/*		 * We do not free on error cases below as remove_vma		 * gets called on error from higher level routine		 */		ret = track_pfn_vma_copy(vma);		if (ret)			return ret;	}	/*	 * We need to invalidate the secondary MMU mappings only when	 * there could be a permission downgrade on the ptes of the	 * parent mm. And a permission downgrade will only happen if	 * is_cow_mapping() returns true.	 */	if (is_cow_mapping(vma->vm_flags))		mmu_notifier_invalidate_range_start(src_mm, addr, end);	ret = 0;	dst_pgd = pgd_offset(dst_mm, addr);	src_pgd = pgd_offset(src_mm, addr);	do {		next = pgd_addr_end(addr, end);		if (pgd_none_or_clear_bad(src_pgd))			continue;		if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,					    vma, addr, next))) {			ret = -ENOMEM;			break;		}	} while (dst_pgd++, src_pgd++, addr = next, addr != end);	if (is_cow_mapping(vma->vm_flags))		mmu_notifier_invalidate_range_end(src_mm,						  vma->vm_start, end);	return ret;}static unsigned long zap_pte_range(struct mmu_gather *tlb,				struct vm_area_struct *vma, pmd_t *pmd,				unsigned long addr, unsigned long end,				long *zap_work, struct zap_details *details){	struct mm_struct *mm = tlb->mm;	pte_t *pte;	spinlock_t *ptl;	int file_rss = 0;	int anon_rss = 0;	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);	arch_enter_lazy_mmu_mode();	do {		pte_t ptent = *pte;		if (pte_none(ptent)) {			(*zap_work)--;			continue;		}		(*zap_work) -= PAGE_SIZE;		if (pte_present(ptent)) {			struct page *page;			page = vm_normal_page(vma, addr, ptent);			if (unlikely(details) && page) {				/*				 * unmap_shared_mapping_pages() wants to				 * invalidate cache without truncating:				 * unmap shared but keep private pages.				 */				if (details->check_mapping &&				    details->check_mapping != page->mapping)					continue;				/*				 * Each page->index must be checked when				 * invalidating or truncating nonlinear.				 */				if (details->nonlinear_vma &&				    (page->index < details->first_index ||				     page->index > details->last_index))					continue;			}			ptent = ptep_get_and_clear_full(mm, addr, pte,							tlb->fullmm);			tlb_remove_tlb_entry(tlb, pte, addr);			if (unlikely(!page))				continue;			if (unlikely(details) && details->nonlinear_vma			    && linear_page_index(details->nonlinear_vma,						addr) != page->index)				set_pte_at(mm, addr, pte,					   pgoff_to_pte(page->index));			if (PageAnon(page))				anon_rss--;			else {				if (pte_dirty(ptent))					set_page_dirty(page);				if (pte_young(ptent) &&				    likely(!VM_SequentialReadHint(vma)))					mark_page_accessed(page);				file_rss--;			}			page_remove_rmap(page);			if (unlikely(page_mapcount(page) < 0))				print_bad_pte(vma, addr, ptent, page);			tlb_remove_page(tlb, page);			continue;		}		/*		 * If details->check_mapping, we leave swap entries;		 * if details->nonlinear_vma, we leave file entries.		 */		if (unlikely(details))			continue;		if (pte_file(ptent)) {			if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))				print_bad_pte(vma, addr, ptent, NULL);		} else if		  (unlikely(!free_swap_and_cache(pte_to_swp_entry(ptent))))			print_bad_pte(vma, addr, ptent, NULL);		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));	add_mm_rss(mm, file_rss, anon_rss);	arch_leave_lazy_mmu_mode();	pte_unmap_unlock(pte - 1, ptl);	return addr;}static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,				struct vm_area_struct *vma, pud_t *pud,				unsigned long addr, unsigned long end,				long *zap_work, struct zap_details *details){	pmd_t *pmd;	unsigned long next;	pmd = pmd_offset(pud, addr);	do {		next = pmd_addr_end(addr, end);		if (pmd_none_or_clear_bad(pmd)) {			(*zap_work)--;			continue;		}		next = zap_pte_range(tlb, vma, pmd, addr, next,						zap_work, details);	} while (pmd++, addr = next, (addr != end && *zap_work > 0));	return addr;}static inline unsigned long zap_pud_range(struct mmu_gather *tlb,				struct vm_area_struct *vma, pgd_t *pgd,				unsigned long addr, unsigned long end,				long *zap_work, struct zap_details *details){	pud_t *pud;	unsigned long next;	pud = pud_offset(pgd, addr);	do {		next = pud_addr_end(addr, end);		if (pud_none_or_clear_bad(pud)) {			(*zap_work)--;			continue;		}		next = zap_pmd_range(tlb, vma, pud, addr, next,						zap_work, details);	} while (pud++, addr = next, (addr != end && *zap_work > 0));	return addr;}static unsigned long unmap_page_range(struct mmu_gather *tlb,				struct vm_area_struct *vma,				unsigned long addr, unsigned long end,				long *zap_work, struct zap_details *details){	pgd_t *pgd;	unsigned long next;	if (details && !details->check_mapping && !details->nonlinear_vma)		details = NULL;	BUG_ON(addr >= end);	tlb_start_vma(tlb, vma);	pgd = pgd_offset(vma->vm_mm, addr);	do {		next = pgd_addr_end(addr, end);		if (pgd_none_or_clear_bad(pgd)) {			(*zap_work)--;			continue;		}		next = zap_pud_range(tlb, vma, pgd, addr, next,						zap_work, details);	} while (pgd++, addr = next, (addr != end && *zap_work > 0));	tlb_end_vma(tlb, vma);	return addr;}#ifdef CONFIG_PREEMPT# define ZAP_BLOCK_SIZE	(8 * PAGE_SIZE)#else/* No preempt: go for improved straight-line efficiency */# define ZAP_BLOCK_SIZE	(1024 * PAGE_SIZE)#endif/**
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -