📄 memory.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
		flush_dcache_page(dst);	} else		copy_user_highpage(dst, src, va, vma);}/* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address * and decrementing the shared-page counter for the old page. * * Note that this routine assumes that the protection checks have been * done by the caller (the low-level page fault routine in most cases). * Thus we can safely just mark it writable once we've done any necessary * COW. * * We also mark the page dirty at this point even though the page will * change only once the write actually happens. This avoids a few races, * and potentially makes it more efficient. * * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), with pte both mapped and locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,		unsigned long address, pte_t *page_table, pmd_t *pmd,		spinlock_t *ptl, pte_t orig_pte){	struct page *old_page, *new_page;	pte_t entry;	int reuse = 0, ret = 0;	int page_mkwrite = 0;	struct page *dirty_page = NULL;	old_page = vm_normal_page(vma, address, orig_pte);	if (!old_page) {		/*		 * VM_MIXEDMAP !pfn_valid() case		 *		 * We should not cow pages in a shared writeable mapping.		 * Just mark the pages writable as we can't do any dirty		 * accounting on raw pfn maps.		 */		if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==				     (VM_WRITE|VM_SHARED))			goto reuse;		goto gotten;	}	/*	 * Take out anonymous pages first, anonymous shared vmas are	 * not dirty accountable.	 */	if (PageAnon(old_page)) {		if (!trylock_page(old_page)) {			page_cache_get(old_page);			pte_unmap_unlock(page_table, ptl);			lock_page(old_page);			page_table = pte_offset_map_lock(mm, pmd, address,							 &ptl);			if (!pte_same(*page_table, orig_pte)) {				unlock_page(old_page);				page_cache_release(old_page);				goto unlock;			}			page_cache_release(old_page);		}		reuse = reuse_swap_page(old_page);		unlock_page(old_page);	} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==					(VM_WRITE|VM_SHARED))) {		/*		 * Only catch write-faults on shared writable pages,		 * read-only shared pages can get COWed by		 * get_user_pages(.write=1, .force=1).		 */		if (vma->vm_ops && vma->vm_ops->page_mkwrite) {			/*			 * Notify the address space that the page is about to			 * become writable so that it can prohibit this or wait			 * for the page to get into an appropriate state.			 *			 * We do this without the lock held, so that it can			 * sleep if it needs to.			 */			page_cache_get(old_page);			pte_unmap_unlock(page_table, ptl);			if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)				goto unwritable_page;			/*			 * Since we dropped the lock we need to revalidate			 * the PTE as someone else may have changed it.  If			 * they did, we just return, as we can count on the			 * MMU to tell us if they didn't also make it writable.			 */			page_table = pte_offset_map_lock(mm, pmd, address,							 &ptl);			page_cache_release(old_page);			if (!pte_same(*page_table, orig_pte))				goto unlock;			page_mkwrite = 1;		}		dirty_page = old_page;		get_page(dirty_page);		reuse = 1;	}	if (reuse) {reuse:		flush_cache_page(vma, address, pte_pfn(orig_pte));		entry = pte_mkyoung(orig_pte);		entry = maybe_mkwrite(pte_mkdirty(entry), vma);		if (ptep_set_access_flags(vma, address, page_table, entry,1))			update_mmu_cache(vma, address, entry);		ret |= VM_FAULT_WRITE;		goto unlock;	}	/*	 * Ok, we need to copy. Oh, well..	 */	page_cache_get(old_page);gotten:	pte_unmap_unlock(page_table, ptl);	if (unlikely(anon_vma_prepare(vma)))		goto oom;	VM_BUG_ON(old_page == ZERO_PAGE(0));	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);	if (!new_page)		goto oom;	/*	 * Don't let another task, with possibly unlocked vma,	 * keep the mlocked page.	 */	if ((vma->vm_flags & VM_LOCKED) && old_page) {		lock_page(old_page);	/* for LRU manipulation */		clear_page_mlock(old_page);		unlock_page(old_page);	}	cow_user_page(new_page, old_page, address, vma);	__SetPageUptodate(new_page);	if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))		goto oom_free_new;	/*	 * Re-check the pte - we dropped the lock	 */	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);	if (likely(pte_same(*page_table, orig_pte))) {		if (old_page) {			if (!PageAnon(old_page)) {				dec_mm_counter(mm, file_rss);				inc_mm_counter(mm, anon_rss);			}		} else			inc_mm_counter(mm, anon_rss);		flush_cache_page(vma, address, pte_pfn(orig_pte));		entry = mk_pte(new_page, vma->vm_page_prot);		entry = maybe_mkwrite(pte_mkdirty(entry), vma);		/*		 * Clear the pte entry and flush it first, before updating the		 * pte with the new entry. This will avoid a race condition		 * seen in the presence of one thread doing SMC and another		 * thread doing COW.		 */		ptep_clear_flush_notify(vma, address, page_table);		page_add_new_anon_rmap(new_page, vma, address);		set_pte_at(mm, address, page_table, entry);		update_mmu_cache(vma, address, entry);		if (old_page) {			/*			 * Only after switching the pte to the new page may			 * we remove the mapcount here. Otherwise another			 * process may come and find the rmap count decremented			 * before the pte is switched to the new page, and			 * "reuse" the old page writing into it while our pte			 * here still points into it and can be read by other			 * threads.			 *			 * The critical issue is to order this			 * page_remove_rmap with the ptp_clear_flush above.			 * Those stores are ordered by (if nothing else,)			 * the barrier present in the atomic_add_negative			 * in page_remove_rmap.			 *			 * Then the TLB flush in ptep_clear_flush ensures that			 * no process can access the old page before the			 * decremented mapcount is visible. And the old page			 * cannot be reused until after the decremented			 * mapcount is visible. So transitively, TLBs to			 * old page will be flushed before it can be reused.			 */			page_remove_rmap(old_page);		}		/* Free the old page.. */		new_page = old_page;		ret |= VM_FAULT_WRITE;	} else		mem_cgroup_uncharge_page(new_page);	if (new_page)		page_cache_release(new_page);	if (old_page)		page_cache_release(old_page);unlock:	pte_unmap_unlock(page_table, ptl);	if (dirty_page) {		if (vma->vm_file)			file_update_time(vma->vm_file);		/*		 * Yes, Virginia, this is actually required to prevent a race		 * with clear_page_dirty_for_io() from clearing the page dirty		 * bit after it clear all dirty ptes, but before a racing		 * do_wp_page installs a dirty pte.		 *		 * do_no_page is protected similarly.		 */		wait_on_page_locked(dirty_page);		set_page_dirty_balance(dirty_page, page_mkwrite);		put_page(dirty_page);	}	return ret;oom_free_new:	page_cache_release(new_page);oom:	if (old_page)		page_cache_release(old_page);	return VM_FAULT_OOM;unwritable_page:	page_cache_release(old_page);	return VM_FAULT_SIGBUS;}/* * Helper functions for unmap_mapping_range(). * * __ Notes on dropping i_mmap_lock to reduce latency while unmapping __ * * We have to restart searching the prio_tree whenever we drop the lock, * since the iterator is only valid while the lock is held, and anyway * a later vma might be split and reinserted earlier while lock dropped. * * The list of nonlinear vmas could be handled more efficiently, using * a placeholder, but handle it in the same way until a need is shown. * It is important to search the prio_tree before nonlinear list: a vma * may become nonlinear and be shifted from prio_tree to nonlinear list * while the lock is dropped; but never shifted from list to prio_tree. * * In order to make forward progress despite restarting the search, * vm_truncate_count is used to mark a vma as now dealt with, so we can * quickly skip it next time around.  Since the prio_tree search only * shows us those vmas affected by unmapping the range in question, we * can't efficiently keep all vmas in step with mapping->truncate_count: * so instead reset them all whenever it wraps back to 0 (then go to 1). * mapping->truncate_count and vma->vm_truncate_count are protected by * i_mmap_lock. * * In order to make forward progress despite repeatedly restarting some * large vma, note the restart_addr from unmap_vmas when it breaks out: * and restart from that address when we reach that vma again.  It might * have been split or merged, shrunk or extended, but never shifted: so * restart_addr remains valid so long as it remains in the vma's range. * unmap_mapping_range forces truncate_count to leap over page-aligned * values so we can save vma's restart_addr in its truncate_count field. */#define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK))static void reset_vma_truncate_counts(struct address_space *mapping){	struct vm_area_struct *vma;	struct prio_tree_iter iter;	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)		vma->vm_truncate_count = 0;	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)		vma->vm_truncate_count = 0;}static int unmap_mapping_range_vma(struct vm_area_struct *vma,		unsigned long start_addr, unsigned long end_addr,		struct zap_details *details){	unsigned long restart_addr;	int need_break;	/*	 * files that support invalidating or truncating portions of the	 * file from under mmaped areas must have their ->fault function	 * return a locked page (and set VM_FAULT_LOCKED in the return).	 * This provides synchronisation against concurrent unmapping here.	 */again:	restart_addr = vma->vm_truncate_count;	if (is_restart_addr(restart_addr) && start_addr < restart_addr) {		start_addr = restart_addr;		if (start_addr >= end_addr) {			/* Top of vma has been split off since last time */			vma->vm_truncate_count = details->truncate_count;			return 0;		}	}	restart_addr = zap_page_range(vma, start_addr,					end_addr - start_addr, details);	need_break = need_resched() || spin_needbreak(details->i_mmap_lock);	if (restart_addr >= end_addr) {		/* We have now completed this vma: mark it so */		vma->vm_truncate_count = details->truncate_count;		if (!need_break)			return 0;	} else {		/* Note restart_addr in vma's truncate_count field */		vma->vm_truncate_count = restart_addr;		if (!need_break)			goto again;	}	spin_unlock(details->i_mmap_lock);	cond_resched();	spin_lock(details->i_mmap_lock);	return -EINTR;}static inline void unmap_mapping_range_tree(struct prio_tree_root *root,					    struct zap_details *details){	struct vm_area_struct *vma;	struct prio_tree_iter iter;	pgoff_t vba, vea, zba, zea;restart:	vma_prio_tree_foreach(vma, &iter, root,			details->first_index, details->last_index) {		/* Skip quickly over those we have already dealt with */		if (vma->vm_truncate_count == details->truncate_count)			continue;		vba = vma->vm_pgoff;		vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;		/* Assume for now that PAGE_CACHE_SHIFT == PAGE_SHIFT */		zba = details->first_index;		if (zba < vba)			zba = vba;		zea = details->last_index;		if (zea > vea)			zea = vea;		if (unmap_mapping_range_vma(vma,			((zba - vba) << PAGE_SHIFT) + vma->vm_start,			((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start,				details) < 0)			goto restart;	}}static inline void unmap_mapping_range_list(struct list_head *head,					    struct zap_details *details){	struct vm_area_struct *vma;	/*	 * In nonlinear VMAs there is no correspondence between virtual address	 * offset and file offset.  So we must perform an exhaustive search	 * across *all* the pages in each nonlinear VMA, not just the pages	 * whose virtual address lies outside the file truncation point.	 */restart:	list_for_each_entry(vma, head, shared.vm_set.list) {		/* Skip quickly over those we have already dealt with */		if (vma->vm_truncate_count == details->truncate_count)			continue;		details->nonlinear_vma = vma;		if (unmap_mapping_range_vma(vma, vma->vm_start,					vma->vm_end, details) < 0)			goto restart;	}}/** * unmap_mapping_range - unmap the portion of all mmaps in the specified address_space corresponding to the specified page range in the underlying file. * @mapping: the address space containing mmaps to be unmapped. * @holebegin: byte in first page to unmap, relative to the start of * the underlying file.  This will be rounded down to a PAGE_SIZE * boundary.  Note that this is different from vmtruncate(), which * must keep the partial page.  In contrast, we must get rid of * partial pages. * @holelen: size of prospective hole in bytes.  This will be rounded * up to a PAGE_SIZE boundary.  A holelen of zero truncates to the * end of the file. * @even_cows: 1 when truncating a file, unmap even private COWed pages; * but 0 when invalidating pagecache, don't throw away private data. */void unmap_mapping_range(struct address_space *mapping,		loff_t const holebegin, loff_t const holelen, int even_cows){	struct zap_details details;	pgoff_t hba = holebegin >> PAGE_SHIFT;	pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;	/* Check for overflow. */	if (sizeof(holelen) > sizeof(hlen)) {		long long holeend =			(holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;		if (holeend & ~(long long)ULONG_MAX)			hlen = ULONG_MAX - hba + 1;	}	details.check_mapping = even_cows? NULL: mapping;	details.nonlinear_vma = NULL;	details.first_index = hba;	details.last_index = hba + hlen - 1;	if (details.last_index < details.first_index)		details.last_index = ULONG_MAX;	details.i_mmap_lock = &mapping->i_mmap_lock;	spin_lock(&mapping->i_mmap_lock);	/* Protect against endless unmapping loops */	mapping->truncate_count++;	if (unlikely(is_restart_addr(mapping->truncate_count))) {		if (mapping->truncate_count == 0)			reset_vma_truncate_counts(mapping);		mapping->truncate_count++;	}	details.truncate_count = mapping->truncate_count;	if (unlikely(!prio_tree_empty(&mapping->i_mmap)))		unmap_mapping_range_tree(&mapping->i_mmap, &details);	if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))		unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);	spin_unlock(&mapping->i_mmap_lock);}EXPORT_SYMBOL(unmap_mapping_range);/** * vmtruncate - unmap mappings "freed" by truncate() syscall * @inode: inode of the file used * @offset: file offset to start truncating * * NOTE! We have to be ready to update the memory sharing * between the file and the memory map for a potential last * incomplete page.  Ugly, but necessary. */int vmtruncate(struct inode * inode, loff_t offset){	if (inode->i_size < offset) {		unsigned long limit;		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;		if (limit != RLIM_INFINITY && offset > limit)			goto out_sig;		if (offset > inode->i_sb->s_maxbytes)			goto out_big;		i_size_write(in
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -