📄 filemap.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* *	linux/mm/filemap.c * * Copyright (C) 1994-1999  Linus Torvalds *//* * This file handles the generic file mmap semantics used by * most "normal" filesystems (but you don't /have/ to use this: * the NFS filesystem used to do this differently, for example) */#include <linux/module.h>#include <linux/slab.h>#include <linux/compiler.h>#include <linux/fs.h>#include <linux/uaccess.h>#include <linux/aio.h>#include <linux/capability.h>#include <linux/kernel_stat.h>#include <linux/mm.h>#include <linux/swap.h>#include <linux/mman.h>#include <linux/pagemap.h>#include <linux/file.h>#include <linux/uio.h>#include <linux/hash.h>#include <linux/writeback.h>#include <linux/backing-dev.h>#include <linux/pagevec.h>#include <linux/blkdev.h>#include <linux/security.h>#include <linux/syscalls.h>#include <linux/cpuset.h>#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */#include <linux/memcontrol.h>#include <linux/mm_inline.h> /* for page_is_file_cache() */#include "internal.h"/* * FIXME: remove all knowledge of the buffer layer from the core VM */#include <linux/buffer_head.h> /* for generic_osync_inode */#include <asm/mman.h>/* * Shared mappings implemented 30.11.1994. It's not fully working yet, * though. * * Shared mappings now work. 15.8.1995  Bruno. * * finished 'unifying' the page and buffer cache and SMP-threaded the * page-cache, 21.05.1999, Ingo Molnar <mingo@redhat.com> * * SMP-threaded pagemap-LRU 1999, Andrea Arcangeli <andrea@suse.de> *//* * Lock ordering: * *  ->i_mmap_lock		(vmtruncate) *    ->private_lock		(__free_pte->__set_page_dirty_buffers) *      ->swap_lock		(exclusive_swap_page, others) *        ->mapping->tree_lock * *  ->i_mutex *    ->i_mmap_lock		(truncate->unmap_mapping_range) * *  ->mmap_sem *    ->i_mmap_lock *      ->page_table_lock or pte_lock	(various, mainly in memory.c) *        ->mapping->tree_lock	(arch-dependent flush_dcache_mmap_lock) * *  ->mmap_sem *    ->lock_page		(access_process_vm) * *  ->i_mutex			(generic_file_buffered_write) *    ->mmap_sem		(fault_in_pages_readable->do_page_fault) * *  ->i_mutex *    ->i_alloc_sem             (various) * *  ->inode_lock *    ->sb_lock			(fs/fs-writeback.c) *    ->mapping->tree_lock	(__sync_single_inode) * *  ->i_mmap_lock *    ->anon_vma.lock		(vma_adjust) * *  ->anon_vma.lock *    ->page_table_lock or pte_lock	(anon_vma_prepare and various) * *  ->page_table_lock or pte_lock *    ->swap_lock		(try_to_unmap_one) *    ->private_lock		(try_to_unmap_one) *    ->tree_lock		(try_to_unmap_one) *    ->zone.lru_lock		(follow_page->mark_page_accessed) *    ->zone.lru_lock		(check_pte_range->isolate_lru_page) *    ->private_lock		(page_remove_rmap->set_page_dirty) *    ->tree_lock		(page_remove_rmap->set_page_dirty) *    ->inode_lock		(page_remove_rmap->set_page_dirty) *    ->inode_lock		(zap_pte_range->set_page_dirty) *    ->private_lock		(zap_pte_range->__set_page_dirty_buffers) * *  ->task->proc_lock *    ->dcache_lock		(proc_pid_lookup) *//* * Remove a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage * is safe.  The caller must hold the mapping's tree_lock. */void __remove_from_page_cache(struct page *page){	struct address_space *mapping = page->mapping;	radix_tree_delete(&mapping->page_tree, page->index);	page->mapping = NULL;	mapping->nrpages--;	__dec_zone_page_state(page, NR_FILE_PAGES);	BUG_ON(page_mapped(page));	mem_cgroup_uncharge_cache_page(page);	/*	 * Some filesystems seem to re-dirty the page even after	 * the VM has canceled the dirty bit (eg ext3 journaling).	 *	 * Fix it up by doing a final dirty accounting check after	 * having removed the page entirely.	 */	if (PageDirty(page) && mapping_cap_account_dirty(mapping)) {		dec_zone_page_state(page, NR_FILE_DIRTY);		dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);	}}void remove_from_page_cache(struct page *page){	struct address_space *mapping = page->mapping;	BUG_ON(!PageLocked(page));	spin_lock_irq(&mapping->tree_lock);	__remove_from_page_cache(page);	spin_unlock_irq(&mapping->tree_lock);}static int sync_page(void *word){	struct address_space *mapping;	struct page *page;	page = container_of((unsigned long *)word, struct page, flags);	/*	 * page_mapping() is being called without PG_locked held.	 * Some knowledge of the state and use of the page is used to	 * reduce the requirements down to a memory barrier.	 * The danger here is of a stale page_mapping() return value	 * indicating a struct address_space different from the one it's	 * associated with when it is associated with one.	 * After smp_mb(), it's either the correct page_mapping() for	 * the page, or an old page_mapping() and the page's own	 * page_mapping() has gone NULL.	 * The ->sync_page() address_space operation must tolerate	 * page_mapping() going NULL. By an amazing coincidence,	 * this comes about because none of the users of the page	 * in the ->sync_page() methods make essential use of the	 * page_mapping(), merely passing the page down to the backing	 * device's unplug functions when it's non-NULL, which in turn	 * ignore it for all cases but swap, where only page_private(page) is	 * of interest. When page_mapping() does go NULL, the entire	 * call stack gracefully ignores the page and returns.	 * -- wli	 */	smp_mb();	mapping = page_mapping(page);	if (mapping && mapping->a_ops && mapping->a_ops->sync_page)		mapping->a_ops->sync_page(page);	io_schedule();	return 0;}static int sync_page_killable(void *word){	sync_page(word);	return fatal_signal_pending(current) ? -EINTR : 0;}/** * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range * @mapping:	address space structure to write * @start:	offset in bytes where the range starts * @end:	offset in bytes where the range ends (inclusive) * @sync_mode:	enable synchronous operation * * Start writeback against all of a mapping's dirty pages that lie * within the byte offsets <start, end> inclusive. * * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as * opposed to a regular memory cleansing writeback.  The difference between * these two operations is that if a dirty page/buffer is encountered, it must * be waited upon, and not just skipped over. */int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,				loff_t end, int sync_mode){	int ret;	struct writeback_control wbc = {		.sync_mode = sync_mode,		.nr_to_write = LONG_MAX,		.range_start = start,		.range_end = end,	};	if (!mapping_cap_writeback_dirty(mapping))		return 0;	ret = do_writepages(mapping, &wbc);	return ret;}static inline int __filemap_fdatawrite(struct address_space *mapping,	int sync_mode){	return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);}int filemap_fdatawrite(struct address_space *mapping){	return __filemap_fdatawrite(mapping, WB_SYNC_ALL);}EXPORT_SYMBOL(filemap_fdatawrite);int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,				loff_t end){	return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);}EXPORT_SYMBOL(filemap_fdatawrite_range);/** * filemap_flush - mostly a non-blocking flush * @mapping:	target address_space * * This is a mostly non-blocking flush.  Not suitable for data-integrity * purposes - I/O may not be started against all dirty pages. */int filemap_flush(struct address_space *mapping){	return __filemap_fdatawrite(mapping, WB_SYNC_NONE);}EXPORT_SYMBOL(filemap_flush);/** * wait_on_page_writeback_range - wait for writeback to complete * @mapping:	target address_space * @start:	beginning page index * @end:	ending page index * * Wait for writeback to complete against pages indexed by start->end * inclusive */int wait_on_page_writeback_range(struct address_space *mapping,				pgoff_t start, pgoff_t end){	struct pagevec pvec;	int nr_pages;	int ret = 0;	pgoff_t index;	if (end < start)		return 0;	pagevec_init(&pvec, 0);	index = start;	while ((index <= end) &&			(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,			PAGECACHE_TAG_WRITEBACK,			min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {		unsigned i;		for (i = 0; i < nr_pages; i++) {			struct page *page = pvec.pages[i];			/* until radix tree lookup accepts end_index */			if (page->index > end)				continue;			wait_on_page_writeback(page);			if (PageError(page))				ret = -EIO;		}		pagevec_release(&pvec);		cond_resched();	}	/* Check for outstanding write errors */	if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))		ret = -ENOSPC;	if (test_and_clear_bit(AS_EIO, &mapping->flags))		ret = -EIO;	return ret;}/** * sync_page_range - write and wait on all pages in the passed range * @inode:	target inode * @mapping:	target address_space * @pos:	beginning offset in pages to write * @count:	number of bytes to write * * Write and wait upon all the pages in the passed range.  This is a "data * integrity" operation.  It waits upon in-flight writeout before starting and * waiting upon new writeout.  If there was an IO error, return it. * * We need to re-take i_mutex during the generic_osync_inode list walk because * it is otherwise livelockable. */int sync_page_range(struct inode *inode, struct address_space *mapping,			loff_t pos, loff_t count){	pgoff_t start = pos >> PAGE_CACHE_SHIFT;	pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;	int ret;	if (!mapping_cap_writeback_dirty(mapping) || !count)		return 0;	ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);	if (ret == 0) {		mutex_lock(&inode->i_mutex);		ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);		mutex_unlock(&inode->i_mutex);	}	if (ret == 0)		ret = wait_on_page_writeback_range(mapping, start, end);	return ret;}EXPORT_SYMBOL(sync_page_range);/** * sync_page_range_nolock - write & wait on all pages in the passed range without locking * @inode:	target inode * @mapping:	target address_space * @pos:	beginning offset in pages to write * @count:	number of bytes to write * * Note: Holding i_mutex across sync_page_range_nolock() is not a good idea * as it forces O_SYNC writers to different parts of the same file * to be serialised right until io completion. */int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,			   loff_t pos, loff_t count){	pgoff_t start = pos >> PAGE_CACHE_SHIFT;	pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;	int ret;	if (!mapping_cap_writeback_dirty(mapping) || !count)		return 0;	ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);	if (ret == 0)		ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);	if (ret == 0)		ret = wait_on_page_writeback_range(mapping, start, end);	return ret;}EXPORT_SYMBOL(sync_page_range_nolock);/** * filemap_fdatawait - wait for all under-writeback pages to complete * @mapping: address space structure to wait for * * Walk the list of under-writeback pages of the given address space * and wait for all of them. */int filemap_fdatawait(struct address_space *mapping){	loff_t i_size = i_size_read(mapping->host);	if (i_size == 0)		return 0;	return wait_on_page_writeback_range(mapping, 0,				(i_size - 1) >> PAGE_CACHE_SHIFT);}EXPORT_SYMBOL(filemap_fdatawait);int filemap_write_and_wait(struct address_space *mapping){	int err = 0;	if (mapping->nrpages) {		err = filemap_fdatawrite(mapping);		/*		 * Even if the above returned error, the pages may be		 * written partially (e.g. -ENOSPC), so we wait for it.		 * But the -EIO is special case, it may indicate the worst		 * thing (e.g. bug) happened, so we avoid waiting for it.		 */		if (err != -EIO) {			int err2 = filemap_fdatawait(mapping);			if (!err)				err = err2;		}	}	return err;}EXPORT_SYMBOL(filemap_write_and_wait);/** * filemap_write_and_wait_range - write out & wait on a file range * @mapping:	the address_space for the pages * @lstart:	offset in bytes where the range starts * @lend:	offset in bytes where the range ends (inclusive) * * Write out and wait upon file offsets lstart->lend, inclusive. * * Note that `lend' is inclusive (describes the last byte to be written) so * that this function can be used to write to the very end-of-file (end = -1). */int filemap_write_and_wait_range(struct address_space *mapping,				 loff_t lstart, loff_t lend){	int err = 0;	if (mapping->nrpages) {		err = __filemap_fdatawrite_range(mapping, lstart, lend,						 WB_SYNC_ALL);		/* See comment of filemap_write_and_wait() */		if (err != -EIO) {			int err2 = wait_on_page_writeback_range(mapping,						lstart >> PAGE_CACHE_SHIFT,						lend >> PAGE_CACHE_SHIFT);			if (!err)				err = err2;		}	}	return err;}/** * add_to_page_cache_locked - add a locked page to the pagecache * @page:	page to add * @mapping:	the page's address_space * @offset:	page index * @gfp_mask:	page allocation mode * * This function is used to add a page to the pagecache. It must be locked. * This function does not add the page to the LRU.  The caller must do that. */int add_to_page_cache_locked(struct page *page, struct address_space *mapping,		pgoff_t offset, gfp_t gfp_mask){	int error;	VM_BUG_ON(!PageLocked(page));	error = mem_cgroup_cache_charge(page, current->mm,					gfp_mask & GFP_RECLAIM_MASK);	if (error)		goto out;	error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);	if (error == 0) {		page_cache_get(page);		page->mapping = mapping;		page->index = offset;		spin_lock_irq(&mapping->tree_lock);		error = radix_tree_insert(&mapping->page_tree, offset, page);		if (likely(!error)) {			mapping->nrpages++;			__inc_zone_page_state(page, NR_FILE_PAGES);		} else {			page->mapping = NULL;			mem_cgroup_uncharge_cache_page(page);			page_cache_release(page);		}		spin_unlock_irq(&mapping->tree_lock);		radix_tree_preload_end();	} else		mem_cgroup_uncharge_cache_page(page);out:	return error;}
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -