📄 vmscan.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* *  linux/mm/vmscan.c * *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds * *  Swap reorganised 29.12.95, Stephen Tweedie. *  kswapd added: 7.1.96  sct *  Removed kswapd_ctl limits, and swap out as many pages as needed *  to bring the system back to freepages.high: 2.4.97, Rik van Riel. *  Zone aware kswapd started 02/00, Kanoj Sarcar (kanoj@sgi.com). *  Multiqueue VM started 5.8.00, Rik van Riel. */#include <linux/mm.h>#include <linux/module.h>#include <linux/slab.h>#include <linux/kernel_stat.h>#include <linux/swap.h>#include <linux/pagemap.h>#include <linux/init.h>#include <linux/highmem.h>#include <linux/vmstat.h>#include <linux/file.h>#include <linux/writeback.h>#include <linux/blkdev.h>#include <linux/buffer_head.h>	/* for try_to_release_page(),					buffer_heads_over_limit */#include <linux/mm_inline.h>#include <linux/pagevec.h>#include <linux/backing-dev.h>#include <linux/rmap.h>#include <linux/topology.h>#include <linux/cpu.h>#include <linux/cpuset.h>#include <linux/notifier.h>#include <linux/rwsem.h>#include <linux/delay.h>#include <linux/kthread.h>#include <linux/freezer.h>#include <linux/memcontrol.h>#include <linux/delayacct.h>#include <linux/sysctl.h>#include <asm/tlbflush.h>#include <asm/div64.h>#include <linux/swapops.h>#include "internal.h"struct scan_control {	/* Incremented by the number of inactive pages that were scanned */	unsigned long nr_scanned;	/* Number of pages freed so far during a call to shrink_zones() */	unsigned long nr_reclaimed;	/* This context's GFP mask */	gfp_t gfp_mask;	int may_writepage;	/* Can pages be swapped as part of reclaim? */	int may_swap;	/* This context's SWAP_CLUSTER_MAX. If freeing memory for	 * suspend, we effectively ignore SWAP_CLUSTER_MAX.	 * In this context, it doesn't matter that we scan the	 * whole list at once. */	int swap_cluster_max;	int swappiness;	int all_unreclaimable;	int order;	/* Which cgroup do we reclaim from */	struct mem_cgroup *mem_cgroup;	/* Pluggable isolate pages callback */	unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,			unsigned long *scanned, int order, int mode,			struct zone *z, struct mem_cgroup *mem_cont,			int active, int file);};#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))#ifdef ARCH_HAS_PREFETCH#define prefetch_prev_lru_page(_page, _base, _field)			\	do {								\		if ((_page)->lru.prev != _base) {			\			struct page *prev;				\									\			prev = lru_to_page(&(_page->lru));		\			prefetch(&prev->_field);			\		}							\	} while (0)#else#define prefetch_prev_lru_page(_page, _base, _field) do { } while (0)#endif#ifdef ARCH_HAS_PREFETCHW#define prefetchw_prev_lru_page(_page, _base, _field)			\	do {								\		if ((_page)->lru.prev != _base) {			\			struct page *prev;				\									\			prev = lru_to_page(&(_page->lru));		\			prefetchw(&prev->_field);			\		}							\	} while (0)#else#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)#endif/* * From 0 .. 100.  Higher means more swappy. */int vm_swappiness = 60;long vm_total_pages;	/* The total number of pages which the VM controls */static LIST_HEAD(shrinker_list);static DECLARE_RWSEM(shrinker_rwsem);#ifdef CONFIG_CGROUP_MEM_RES_CTLR#define scanning_global_lru(sc)	(!(sc)->mem_cgroup)#else#define scanning_global_lru(sc)	(1)#endifstatic struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,						  struct scan_control *sc){	if (!scanning_global_lru(sc))		return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone);	return &zone->reclaim_stat;}static unsigned long zone_nr_pages(struct zone *zone, struct scan_control *sc,				   enum lru_list lru){	if (!scanning_global_lru(sc))		return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru);	return zone_page_state(zone, NR_LRU_BASE + lru);}/* * Add a shrinker callback to be called from the vm */void register_shrinker(struct shrinker *shrinker){	shrinker->nr = 0;	down_write(&shrinker_rwsem);	list_add_tail(&shrinker->list, &shrinker_list);	up_write(&shrinker_rwsem);}EXPORT_SYMBOL(register_shrinker);/* * Remove one */void unregister_shrinker(struct shrinker *shrinker){	down_write(&shrinker_rwsem);	list_del(&shrinker->list);	up_write(&shrinker_rwsem);}EXPORT_SYMBOL(unregister_shrinker);#define SHRINK_BATCH 128/* * Call the shrink functions to age shrinkable caches * * Here we assume it costs one seek to replace a lru page and that it also * takes a seek to recreate a cache object.  With this in mind we age equal * percentages of the lru and ageable caches.  This should balance the seeks * generated by these structures. * * If the vm encountered mapped pages on the LRU it increase the pressure on * slab to avoid swapping. * * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits. * * `lru_pages' represents the number of on-LRU pages in all the zones which * are eligible for the caller's allocation attempt.  It is used for balancing * slab reclaim versus page reclaim. * * Returns the number of slab objects which we shrunk. */unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,			unsigned long lru_pages){	struct shrinker *shrinker;	unsigned long ret = 0;	if (scanned == 0)		scanned = SWAP_CLUSTER_MAX;	if (!down_read_trylock(&shrinker_rwsem))		return 1;	/* Assume we'll be able to shrink next time */	list_for_each_entry(shrinker, &shrinker_list, list) {		unsigned long long delta;		unsigned long total_scan;		unsigned long max_pass = (*shrinker->shrink)(0, gfp_mask);		delta = (4 * scanned) / shrinker->seeks;		delta *= max_pass;		do_div(delta, lru_pages + 1);		shrinker->nr += delta;		if (shrinker->nr < 0) {			printk(KERN_ERR "%s: nr=%ld\n",					__func__, shrinker->nr);			shrinker->nr = max_pass;		}		/*		 * Avoid risking looping forever due to too large nr value:		 * never try to free more than twice the estimate number of		 * freeable entries.		 */		if (shrinker->nr > max_pass * 2)			shrinker->nr = max_pass * 2;		total_scan = shrinker->nr;		shrinker->nr = 0;		while (total_scan >= SHRINK_BATCH) {			long this_scan = SHRINK_BATCH;			int shrink_ret;			int nr_before;			nr_before = (*shrinker->shrink)(0, gfp_mask);			shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask);			if (shrink_ret == -1)				break;			if (shrink_ret < nr_before)				ret += nr_before - shrink_ret;			count_vm_events(SLABS_SCANNED, this_scan);			total_scan -= this_scan;			cond_resched();		}		shrinker->nr += total_scan;	}	up_read(&shrinker_rwsem);	return ret;}/* Called without lock on whether page is mapped, so answer is unstable */static inline int page_mapping_inuse(struct page *page){	struct address_space *mapping;	/* Page is in somebody's page tables. */	if (page_mapped(page))		return 1;	/* Be more reluctant to reclaim swapcache than pagecache */	if (PageSwapCache(page))		return 1;	mapping = page_mapping(page);	if (!mapping)		return 0;	/* File is mmap'd by somebody? */	return mapping_mapped(mapping);}static inline int is_page_cache_freeable(struct page *page){	return page_count(page) - !!PagePrivate(page) == 2;}static int may_write_to_queue(struct backing_dev_info *bdi){	if (current->flags & PF_SWAPWRITE)		return 1;	if (!bdi_write_congested(bdi))		return 1;	if (bdi == current->backing_dev_info)		return 1;	return 0;}/* * We detected a synchronous write error writing a page out.  Probably * -ENOSPC.  We need to propagate that into the address_space for a subsequent * fsync(), msync() or close(). * * The tricky part is that after writepage we cannot touch the mapping: nothing * prevents it from being freed up.  But we have a ref on the page and once * that page is locked, the mapping is pinned. * * We're allowed to run sleeping lock_page() here because we know the caller has * __GFP_FS. */static void handle_write_error(struct address_space *mapping,				struct page *page, int error){	lock_page(page);	if (page_mapping(page) == mapping)		mapping_set_error(mapping, error);	unlock_page(page);}/* Request for sync pageout. */enum pageout_io {	PAGEOUT_IO_ASYNC,	PAGEOUT_IO_SYNC,};/* possible outcome of pageout() */typedef enum {	/* failed to write page out, page is locked */	PAGE_KEEP,	/* move page to the active list, page is locked */	PAGE_ACTIVATE,	/* page has been sent to the disk successfully, page is unlocked */	PAGE_SUCCESS,	/* page is clean and locked */	PAGE_CLEAN,} pageout_t;/* * pageout is called by shrink_page_list() for each dirty page. * Calls ->writepage(). */static pageout_t pageout(struct page *page, struct address_space *mapping,						enum pageout_io sync_writeback){	/*	 * If the page is dirty, only perform writeback if that write	 * will be non-blocking.  To prevent this allocation from being	 * stalled by pagecache activity.  But note that there may be	 * stalls if we need to run get_block().  We could test	 * PagePrivate for that.	 *	 * If this process is currently in generic_file_write() against	 * this page's queue, we can perform writeback even if that	 * will block.	 *	 * If the page is swapcache, write it back even if that would	 * block, for some throttling. This happens by accident, because	 * swap_backing_dev_info is bust: it doesn't reflect the	 * congestion state of the swapdevs.  Easy to fix, if needed.	 * See swapfile.c:page_queue_congested().	 */	if (!is_page_cache_freeable(page))		return PAGE_KEEP;	if (!mapping) {		/*		 * Some data journaling orphaned pages can have		 * page->mapping == NULL while being dirty with clean buffers.		 */		if (PagePrivate(page)) {			if (try_to_free_buffers(page)) {				ClearPageDirty(page);				printk("%s: orphaned page\n", __func__);				return PAGE_CLEAN;			}		}		return PAGE_KEEP;	}	if (mapping->a_ops->writepage == NULL)		return PAGE_ACTIVATE;	if (!may_write_to_queue(mapping->backing_dev_info))		return PAGE_KEEP;	if (clear_page_dirty_for_io(page)) {		int res;		struct writeback_control wbc = {			.sync_mode = WB_SYNC_NONE,			.nr_to_write = SWAP_CLUSTER_MAX,			.range_start = 0,			.range_end = LLONG_MAX,			.nonblocking = 1,			.for_reclaim = 1,		};		SetPageReclaim(page);		res = mapping->a_ops->writepage(page, &wbc);		if (res < 0)			handle_write_error(mapping, page, res);		if (res == AOP_WRITEPAGE_ACTIVATE) {			ClearPageReclaim(page);			return PAGE_ACTIVATE;		}		/*		 * Wait on writeback if requested to. This happens when		 * direct reclaiming a large contiguous area and the		 * first attempt to free a range of pages fails.		 */		if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC)			wait_on_page_writeback(page);		if (!PageWriteback(page)) {			/* synchronous write or broken a_ops? */			ClearPageReclaim(page);		}		inc_zone_page_state(page, NR_VMSCAN_WRITE);		return PAGE_SUCCESS;	}	return PAGE_CLEAN;}/* * Same as remove_mapping, but if the page is removed from the mapping, it * gets returned with a refcount of 0. */static int __remove_mapping(struct address_space *mapping, struct page *page){	BUG_ON(!PageLocked(page));	BUG_ON(mapping != page_mapping(page));	spin_lock_irq(&mapping->tree_lock);	/*	 * The non racy check for a busy page.	 *	 * Must be careful with the order of the tests. When someone has	 * a ref to the page, it may be possible that they dirty it then	 * drop the reference. So if PageDirty is tested before page_count	 * here, then the following race may occur:	 *	 * get_user_pages(&page);	 * [user mapping goes away]	 * write_to(page);	 *				!PageDirty(page)    [good]	 * SetPageDirty(page);	 * put_page(page);	 *				!page_count(page)   [good, discard it]	 *	 * [oops, our write_to data is lost]	 *	 * Reversing the order of the tests ensures such a situation cannot	 * escape unnoticed. The smp_rmb is needed to ensure the page->flags	 * load is not satisfied before that of page->_count.	 *	 * Note that if SetPageDirty is always performed via set_page_dirty,	 * and thus under tree_lock, then this ordering is not required.	 */	if (!page_freeze_refs(page, 2))		goto cannot_free;	/* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */	if (unlikely(PageDirty(page))) {		page_unfreeze_refs(page, 2);		goto cannot_free;	}	if (PageSwapCache(page)) {		swp_entry_t swap = { .val = page_private(page) };		__delete_from_swap_cache(page);		spin_unlock_irq(&mapping->tree_lock);		swap_free(swap);	} else {		__remove_from_page_cache(page);		spin_unlock_irq(&mapping->tree_lock);	}
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -