📄 page-writeback.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* * mm/page-writeback.c * * Copyright (C) 2002, Linus Torvalds. * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> * * Contains functions related to writing back dirty pages at the * address_space level. * * 10Apr2002	Andrew Morton *		Initial version */#include <linux/kernel.h>#include <linux/module.h>#include <linux/spinlock.h>#include <linux/fs.h>#include <linux/mm.h>#include <linux/swap.h>#include <linux/slab.h>#include <linux/pagemap.h>#include <linux/writeback.h>#include <linux/init.h>#include <linux/backing-dev.h>#include <linux/task_io_accounting_ops.h>#include <linux/blkdev.h>#include <linux/mpage.h>#include <linux/rmap.h>#include <linux/percpu.h>#include <linux/notifier.h>#include <linux/smp.h>#include <linux/sysctl.h>#include <linux/cpu.h>#include <linux/syscalls.h>#include <linux/buffer_head.h>#include <linux/pagevec.h>/* * The maximum number of pages to writeout in a single bdflush/kupdate * operation.  We do this so we don't hold I_SYNC against an inode for * enormous amounts of time, which would block a userspace task which has * been forced to throttle against that inode.  Also, the code reevaluates * the dirty each time it has written this many pages. */#define MAX_WRITEBACK_PAGES	1024/* * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited * will look to see if it needs to force writeback or throttling. */static long ratelimit_pages = 32;/* * When balance_dirty_pages decides that the caller needs to perform some * non-background writeback, this is how many pages it will attempt to write. * It should be somewhat larger than RATELIMIT_PAGES to ensure that reasonably * large amounts of I/O are submitted. */static inline long sync_writeback_pages(void){	return ratelimit_pages + ratelimit_pages / 2;}/* The following parameters are exported via /proc/sys/vm *//* * Start background writeback (via pdflush) at this percentage */int dirty_background_ratio = 5;/* * dirty_background_bytes starts at 0 (disabled) so that it is a function of * dirty_background_ratio * the amount of dirtyable memory */unsigned long dirty_background_bytes;/* * free highmem will not be subtracted from the total free memory * for calculating free ratios if vm_highmem_is_dirtyable is true */int vm_highmem_is_dirtyable;/* * The generator of dirty data starts writeback at this percentage */int vm_dirty_ratio = 10;/* * vm_dirty_bytes starts at 0 (disabled) so that it is a function of * vm_dirty_ratio * the amount of dirtyable memory */unsigned long vm_dirty_bytes;/* * The interval between `kupdate'-style writebacks, in jiffies */int dirty_writeback_interval = 5 * HZ;/* * The longest number of jiffies for which data is allowed to remain dirty */int dirty_expire_interval = 30 * HZ;/* * Flag that makes the machine dump writes/reads and block dirtyings. */int block_dump;/* * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies: * a full sync is triggered after this time elapses without any disk activity. */int laptop_mode;EXPORT_SYMBOL(laptop_mode);/* End of sysctl-exported parameters */static void background_writeout(unsigned long _min_pages);/* * Scale the writeback cache size proportional to the relative writeout speeds. * * We do this by keeping a floating proportion between BDIs, based on page * writeback completions [end_page_writeback()]. Those devices that write out * pages fastest will get the larger share, while the slower will get a smaller * share. * * We use page writeout completions because we are interested in getting rid of * dirty pages. Having them written out is the primary goal. * * We introduce a concept of time, a period over which we measure these events, * because demand can/will vary over time. The length of this period itself is * measured in page writeback completions. * */static struct prop_descriptor vm_completions;static struct prop_descriptor vm_dirties;/* * couple the period to the dirty_ratio: * *   period/2 ~ roundup_pow_of_two(dirty limit) */static int calc_period_shift(void){	unsigned long dirty_total;	if (vm_dirty_bytes)		dirty_total = vm_dirty_bytes / PAGE_SIZE;	else		dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) /				100;	return 2 + ilog2(dirty_total - 1);}/* * update the period when the dirty threshold changes. */static void update_completion_period(void){	int shift = calc_period_shift();	prop_change_shift(&vm_completions, shift);	prop_change_shift(&vm_dirties, shift);}int dirty_background_ratio_handler(struct ctl_table *table, int write,		struct file *filp, void __user *buffer, size_t *lenp,		loff_t *ppos){	int ret;	ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);	if (ret == 0 && write)		dirty_background_bytes = 0;	return ret;}int dirty_background_bytes_handler(struct ctl_table *table, int write,		struct file *filp, void __user *buffer, size_t *lenp,		loff_t *ppos){	int ret;	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);	if (ret == 0 && write)		dirty_background_ratio = 0;	return ret;}int dirty_ratio_handler(struct ctl_table *table, int write,		struct file *filp, void __user *buffer, size_t *lenp,		loff_t *ppos){	int old_ratio = vm_dirty_ratio;	int ret;	ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);	if (ret == 0 && write && vm_dirty_ratio != old_ratio) {		update_completion_period();		vm_dirty_bytes = 0;	}	return ret;}int dirty_bytes_handler(struct ctl_table *table, int write,		struct file *filp, void __user *buffer, size_t *lenp,		loff_t *ppos){	unsigned long old_bytes = vm_dirty_bytes;	int ret;	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);	if (ret == 0 && write && vm_dirty_bytes != old_bytes) {		update_completion_period();		vm_dirty_ratio = 0;	}	return ret;}/* * Increment the BDI's writeout completion count and the global writeout * completion count. Called from test_clear_page_writeback(). */static inline void __bdi_writeout_inc(struct backing_dev_info *bdi){	__prop_inc_percpu_max(&vm_completions, &bdi->completions,			      bdi->max_prop_frac);}void bdi_writeout_inc(struct backing_dev_info *bdi){	unsigned long flags;	local_irq_save(flags);	__bdi_writeout_inc(bdi);	local_irq_restore(flags);}EXPORT_SYMBOL_GPL(bdi_writeout_inc);void task_dirty_inc(struct task_struct *tsk){	prop_inc_single(&vm_dirties, &tsk->dirties);}/* * Obtain an accurate fraction of the BDI's portion. */static void bdi_writeout_fraction(struct backing_dev_info *bdi,		long *numerator, long *denominator){	if (bdi_cap_writeback_dirty(bdi)) {		prop_fraction_percpu(&vm_completions, &bdi->completions,				numerator, denominator);	} else {		*numerator = 0;		*denominator = 1;	}}/* * Clip the earned share of dirty pages to that which is actually available. * This avoids exceeding the total dirty_limit when the floating averages * fluctuate too quickly. */static voidclip_bdi_dirty_limit(struct backing_dev_info *bdi, long dirty, long *pbdi_dirty){	long avail_dirty;	avail_dirty = dirty -		(global_page_state(NR_FILE_DIRTY) +		 global_page_state(NR_WRITEBACK) +		 global_page_state(NR_UNSTABLE_NFS) +		 global_page_state(NR_WRITEBACK_TEMP));	if (avail_dirty < 0)		avail_dirty = 0;	avail_dirty += bdi_stat(bdi, BDI_RECLAIMABLE) +		bdi_stat(bdi, BDI_WRITEBACK);	*pbdi_dirty = min(*pbdi_dirty, avail_dirty);}static inline void task_dirties_fraction(struct task_struct *tsk,		long *numerator, long *denominator){	prop_fraction_single(&vm_dirties, &tsk->dirties,				numerator, denominator);}/* * scale the dirty limit * * task specific dirty limit: * *   dirty -= (dirty/8) * p_{t} */static void task_dirty_limit(struct task_struct *tsk, long *pdirty){	long numerator, denominator;	long dirty = *pdirty;	u64 inv = dirty >> 3;	task_dirties_fraction(tsk, &numerator, &denominator);	inv *= numerator;	do_div(inv, denominator);	dirty -= inv;	if (dirty < *pdirty/2)		dirty = *pdirty/2;	*pdirty = dirty;}/* * */static DEFINE_SPINLOCK(bdi_lock);static unsigned int bdi_min_ratio;int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio){	int ret = 0;	unsigned long flags;	spin_lock_irqsave(&bdi_lock, flags);	if (min_ratio > bdi->max_ratio) {		ret = -EINVAL;	} else {		min_ratio -= bdi->min_ratio;		if (bdi_min_ratio + min_ratio < 100) {			bdi_min_ratio += min_ratio;			bdi->min_ratio += min_ratio;		} else {			ret = -EINVAL;		}	}	spin_unlock_irqrestore(&bdi_lock, flags);	return ret;}int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio){	unsigned long flags;	int ret = 0;	if (max_ratio > 100)		return -EINVAL;	spin_lock_irqsave(&bdi_lock, flags);	if (bdi->min_ratio > max_ratio) {		ret = -EINVAL;	} else {		bdi->max_ratio = max_ratio;		bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;	}	spin_unlock_irqrestore(&bdi_lock, flags);	return ret;}EXPORT_SYMBOL(bdi_set_max_ratio);/* * Work out the current dirty-memory clamping and background writeout * thresholds. * * The main aim here is to lower them aggressively if there is a lot of mapped * memory around.  To avoid stressing page reclaim with lots of unreclaimable * pages.  It is better to clamp down on writers than to start swapping, and * performing lots of scanning. * * We only allow 1/2 of the currently-unmapped memory to be dirtied. * * We don't permit the clamping level to fall below 5% - that is getting rather * excessive. * * We make sure that the background writeout level is below the adjusted * clamping level. */static unsigned long highmem_dirtyable_memory(unsigned long total){#ifdef CONFIG_HIGHMEM	int node;	unsigned long x = 0;	for_each_node_state(node, N_HIGH_MEMORY) {		struct zone *z =			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];		x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);	}	/*	 * Make sure that the number of highmem pages is never larger	 * than the number of the total dirtyable memory. This can only	 * occur in very strange VM situations but we want to make sure	 * that this does not occur.	 */	return min(x, total);#else	return 0;#endif}/** * determine_dirtyable_memory - amount of memory that may be used * * Returns the numebr of pages that can currently be freed and used * by the kernel for direct mappings. */unsigned long determine_dirtyable_memory(void){	unsigned long x;	x = global_page_state(NR_FREE_PAGES) + global_lru_pages();	if (!vm_highmem_is_dirtyable)		x -= highmem_dirtyable_memory(x);	return x + 1;	/* Ensure that we never return 0 */}voidget_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,		 unsigned long *pbdi_dirty, struct backing_dev_info *bdi){	unsigned long background;	unsigned long dirty;	unsigned long available_memory = determine_dirtyable_memory();	struct task_struct *tsk;	if (vm_dirty_bytes)		dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);	else {		int dirty_ratio;		dirty_ratio = vm_dirty_ratio;		if (dirty_ratio < 5)			dirty_ratio = 5;		dirty = (dirty_ratio * available_memory) / 100;	}	if (dirty_background_bytes)		background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE);	else		background = (dirty_background_ratio * available_memory) / 100;	if (background >= dirty)		background = dirty / 2;	tsk = current;	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {		background += background / 4;		dirty += dirty / 4;	}	*pbackground = background;	*pdirty = dirty;	if (bdi) {		u64 bdi_dirty;		long numerator, denominator;		/*		 * Calculate this BDI's share of the dirty ratio.		 */		bdi_writeout_fraction(bdi, &numerator, &denominator);		bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100;		bdi_dirty *= numerator;		do_div(bdi_dirty, denominator);		bdi_dirty += (dirty * bdi->min_ratio) / 100;		if (bdi_dirty > (dirty * bdi->max_ratio) / 100)			bdi_dirty = dirty * bdi->max_ratio / 100;		*pbdi_dirty = bdi_dirty;		clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty);		task_dirty_limit(current, pbdi_dirty);	}}/*
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -