📄 swapfile.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/* *  linux/mm/swapfile.c * *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds *  Swap reorganised 29.12.95, Stephen Tweedie */#include <linux/mm.h>#include <linux/hugetlb.h>#include <linux/mman.h>#include <linux/slab.h>#include <linux/kernel_stat.h>#include <linux/swap.h>#include <linux/vmalloc.h>#include <linux/pagemap.h>#include <linux/namei.h>#include <linux/shm.h>#include <linux/blkdev.h>#include <linux/random.h>#include <linux/writeback.h>#include <linux/proc_fs.h>#include <linux/seq_file.h>#include <linux/init.h>#include <linux/module.h>#include <linux/rmap.h>#include <linux/security.h>#include <linux/backing-dev.h>#include <linux/mutex.h>#include <linux/capability.h>#include <linux/syscalls.h>#include <linux/memcontrol.h>#include <asm/pgtable.h>#include <asm/tlbflush.h>#include <linux/swapops.h>#include <linux/page_cgroup.h>static DEFINE_SPINLOCK(swap_lock);static unsigned int nr_swapfiles;long nr_swap_pages;long total_swap_pages;static int swap_overflow;static int least_priority;static const char Bad_file[] = "Bad swap file entry ";static const char Unused_file[] = "Unused swap file entry ";static const char Bad_offset[] = "Bad swap offset entry ";static const char Unused_offset[] = "Unused swap offset entry ";static struct swap_list_t swap_list = {-1, -1};static struct swap_info_struct swap_info[MAX_SWAPFILES];static DEFINE_MUTEX(swapon_mutex);/* * We need this because the bdev->unplug_fn can sleep and we cannot * hold swap_lock while calling the unplug_fn. And swap_lock * cannot be turned into a mutex. */static DECLARE_RWSEM(swap_unplug_sem);void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page){	swp_entry_t entry;	down_read(&swap_unplug_sem);	entry.val = page_private(page);	if (PageSwapCache(page)) {		struct block_device *bdev = swap_info[swp_type(entry)].bdev;		struct backing_dev_info *bdi;		/*		 * If the page is removed from swapcache from under us (with a		 * racy try_to_unuse/swapoff) we need an additional reference		 * count to avoid reading garbage from page_private(page) above.		 * If the WARN_ON triggers during a swapoff it maybe the race		 * condition and it's harmless. However if it triggers without		 * swapoff it signals a problem.		 */		WARN_ON(page_count(page) <= 1);		bdi = bdev->bd_inode->i_mapping->backing_dev_info;		blk_run_backing_dev(bdi, page);	}	up_read(&swap_unplug_sem);}/* * swapon tell device that all the old swap contents can be discarded, * to allow the swap device to optimize its wear-levelling. */static int discard_swap(struct swap_info_struct *si){	struct swap_extent *se;	int err = 0;	list_for_each_entry(se, &si->extent_list, list) {		sector_t start_block = se->start_block << (PAGE_SHIFT - 9);		sector_t nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);		if (se->start_page == 0) {			/* Do not discard the swap header page! */			start_block += 1 << (PAGE_SHIFT - 9);			nr_blocks -= 1 << (PAGE_SHIFT - 9);			if (!nr_blocks)				continue;		}		err = blkdev_issue_discard(si->bdev, start_block,						nr_blocks, GFP_KERNEL);		if (err)			break;		cond_resched();	}	return err;		/* That will often be -EOPNOTSUPP */}/* * swap allocation tell device that a cluster of swap can now be discarded, * to allow the swap device to optimize its wear-levelling. */static void discard_swap_cluster(struct swap_info_struct *si,				 pgoff_t start_page, pgoff_t nr_pages){	struct swap_extent *se = si->curr_swap_extent;	int found_extent = 0;	while (nr_pages) {		struct list_head *lh;		if (se->start_page <= start_page &&		    start_page < se->start_page + se->nr_pages) {			pgoff_t offset = start_page - se->start_page;			sector_t start_block = se->start_block + offset;			sector_t nr_blocks = se->nr_pages - offset;			if (nr_blocks > nr_pages)				nr_blocks = nr_pages;			start_page += nr_blocks;			nr_pages -= nr_blocks;			if (!found_extent++)				si->curr_swap_extent = se;			start_block <<= PAGE_SHIFT - 9;			nr_blocks <<= PAGE_SHIFT - 9;			if (blkdev_issue_discard(si->bdev, start_block,							nr_blocks, GFP_NOIO))				break;		}		lh = se->list.next;		if (lh == &si->extent_list)			lh = lh->next;		se = list_entry(lh, struct swap_extent, list);	}}static int wait_for_discard(void *word){	schedule();	return 0;}#define SWAPFILE_CLUSTER	256#define LATENCY_LIMIT		256static inline unsigned long scan_swap_map(struct swap_info_struct *si){	unsigned long offset;	unsigned long scan_base;	unsigned long last_in_cluster = 0;	int latency_ration = LATENCY_LIMIT;	int found_free_cluster = 0;	/*	 * We try to cluster swap pages by allocating them sequentially	 * in swap.  Once we've allocated SWAPFILE_CLUSTER pages this	 * way, however, we resort to first-free allocation, starting	 * a new cluster.  This prevents us from scattering swap pages	 * all over the entire swap partition, so that we reduce	 * overall disk seek times between swap pages.  -- sct	 * But we do now try to find an empty cluster.  -Andrea	 * And we let swap pages go all over an SSD partition.  Hugh	 */	si->flags += SWP_SCANNING;	scan_base = offset = si->cluster_next;	if (unlikely(!si->cluster_nr--)) {		if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {			si->cluster_nr = SWAPFILE_CLUSTER - 1;			goto checks;		}		if (si->flags & SWP_DISCARDABLE) {			/*			 * Start range check on racing allocations, in case			 * they overlap the cluster we eventually decide on			 * (we scan without swap_lock to allow preemption).			 * It's hardly conceivable that cluster_nr could be			 * wrapped during our scan, but don't depend on it.			 */			if (si->lowest_alloc)				goto checks;			si->lowest_alloc = si->max;			si->highest_alloc = 0;		}		spin_unlock(&swap_lock);		/*		 * If seek is expensive, start searching for new cluster from		 * start of partition, to minimize the span of allocated swap.		 * But if seek is cheap, search from our current position, so		 * that swap is allocated from all over the partition: if the		 * Flash Translation Layer only remaps within limited zones,		 * we don't want to wear out the first zone too quickly.		 */		if (!(si->flags & SWP_SOLIDSTATE))			scan_base = offset = si->lowest_bit;		last_in_cluster = offset + SWAPFILE_CLUSTER - 1;		/* Locate the first empty (unaligned) cluster */		for (; last_in_cluster <= si->highest_bit; offset++) {			if (si->swap_map[offset])				last_in_cluster = offset + SWAPFILE_CLUSTER;			else if (offset == last_in_cluster) {				spin_lock(&swap_lock);				offset -= SWAPFILE_CLUSTER - 1;				si->cluster_next = offset;				si->cluster_nr = SWAPFILE_CLUSTER - 1;				found_free_cluster = 1;				goto checks;			}			if (unlikely(--latency_ration < 0)) {				cond_resched();				latency_ration = LATENCY_LIMIT;			}		}		offset = si->lowest_bit;		last_in_cluster = offset + SWAPFILE_CLUSTER - 1;		/* Locate the first empty (unaligned) cluster */		for (; last_in_cluster < scan_base; offset++) {			if (si->swap_map[offset])				last_in_cluster = offset + SWAPFILE_CLUSTER;			else if (offset == last_in_cluster) {				spin_lock(&swap_lock);				offset -= SWAPFILE_CLUSTER - 1;				si->cluster_next = offset;				si->cluster_nr = SWAPFILE_CLUSTER - 1;				found_free_cluster = 1;				goto checks;			}			if (unlikely(--latency_ration < 0)) {				cond_resched();				latency_ration = LATENCY_LIMIT;			}		}		offset = scan_base;		spin_lock(&swap_lock);		si->cluster_nr = SWAPFILE_CLUSTER - 1;		si->lowest_alloc = 0;	}checks:	if (!(si->flags & SWP_WRITEOK))		goto no_page;	if (!si->highest_bit)		goto no_page;	if (offset > si->highest_bit)		scan_base = offset = si->lowest_bit;	if (si->swap_map[offset])		goto scan;	if (offset == si->lowest_bit)		si->lowest_bit++;	if (offset == si->highest_bit)		si->highest_bit--;	si->inuse_pages++;	if (si->inuse_pages == si->pages) {		si->lowest_bit = si->max;		si->highest_bit = 0;	}	si->swap_map[offset] = 1;	si->cluster_next = offset + 1;	si->flags -= SWP_SCANNING;	if (si->lowest_alloc) {		/*		 * Only set when SWP_DISCARDABLE, and there's a scan		 * for a free cluster in progress or just completed.		 */		if (found_free_cluster) {			/*			 * To optimize wear-levelling, discard the			 * old data of the cluster, taking care not to			 * discard any of its pages that have already			 * been allocated by racing tasks (offset has			 * already stepped over any at the beginning).			 */			if (offset < si->highest_alloc &&			    si->lowest_alloc <= last_in_cluster)				last_in_cluster = si->lowest_alloc - 1;			si->flags |= SWP_DISCARDING;			spin_unlock(&swap_lock);			if (offset < last_in_cluster)				discard_swap_cluster(si, offset,					last_in_cluster - offset + 1);			spin_lock(&swap_lock);			si->lowest_alloc = 0;			si->flags &= ~SWP_DISCARDING;			smp_mb();	/* wake_up_bit advises this */			wake_up_bit(&si->flags, ilog2(SWP_DISCARDING));		} else if (si->flags & SWP_DISCARDING) {			/*			 * Delay using pages allocated by racing tasks			 * until the whole discard has been issued. We			 * could defer that delay until swap_writepage,			 * but it's easier to keep this self-contained.			 */			spin_unlock(&swap_lock);			wait_on_bit(&si->flags, ilog2(SWP_DISCARDING),				wait_for_discard, TASK_UNINTERRUPTIBLE);			spin_lock(&swap_lock);		} else {			/*			 * Note pages allocated by racing tasks while			 * scan for a free cluster is in progress, so			 * that its final discard can exclude them.			 */			if (offset < si->lowest_alloc)				si->lowest_alloc = offset;			if (offset > si->highest_alloc)				si->highest_alloc = offset;		}	}	return offset;scan:	spin_unlock(&swap_lock);	while (++offset <= si->highest_bit) {		if (!si->swap_map[offset]) {			spin_lock(&swap_lock);			goto checks;		}		if (unlikely(--latency_ration < 0)) {			cond_resched();			latency_ration = LATENCY_LIMIT;		}	}	offset = si->lowest_bit;	while (++offset < scan_base) {		if (!si->swap_map[offset]) {			spin_lock(&swap_lock);			goto checks;		}		if (unlikely(--latency_ration < 0)) {			cond_resched();			latency_ration = LATENCY_LIMIT;		}	}	spin_lock(&swap_lock);no_page:	si->flags -= SWP_SCANNING;	return 0;}swp_entry_t get_swap_page(void){	struct swap_info_struct *si;	pgoff_t offset;	int type, next;	int wrapped = 0;	spin_lock(&swap_lock);	if (nr_swap_pages <= 0)		goto noswap;	nr_swap_pages--;	for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {		si = swap_info + type;		next = si->next;		if (next < 0 ||		    (!wrapped && si->prio != swap_info[next].prio)) {			next = swap_list.head;			wrapped++;		}		if (!si->highest_bit)			continue;		if (!(si->flags & SWP_WRITEOK))			continue;		swap_list.next = next;		offset = scan_swap_map(si);		if (offset) {			spin_unlock(&swap_lock);			return swp_entry(type, offset);		}		next = swap_list.next;	}	nr_swap_pages++;noswap:	spin_unlock(&swap_lock);	return (swp_entry_t) {0};}swp_entry_t get_swap_page_of_type(int type){	struct swap_info_struct *si;	pgoff_t offset;	spin_lock(&swap_lock);	si = swap_info + type;	if (si->flags & SWP_WRITEOK) {		nr_swap_pages--;		offset = scan_swap_map(si);		if (offset) {			spin_unlock(&swap_lock);			return swp_entry(type, offset);		}		nr_swap_pages++;	}	spin_unlock(&swap_lock);	return (swp_entry_t) {0};}static struct swap_info_struct * swap_info_get(swp_entry_t entry){	struct swap_info_struct * p;	unsigned long offset, type;	if (!entry.val)		goto out;	type = swp_type(entry);	if (type >= nr_swapfiles)		goto bad_nofile;	p = & swap_info[type];	if (!(p->flags & SWP_USED))		goto bad_device;	offset = swp_offset(entry);	if (offset >= p->max)		goto bad_offset;	if (!p->swap_map[offset])		goto bad_free;	spin_lock(&swap_lock);	return p;bad_free:	printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val);	goto out;bad_offset:	printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val);	goto out;bad_device:	printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val);	goto out;bad_nofile:	printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val);out:	return NULL;}static int swap_entry_free(struct swap_info_struct *p, swp_entry_t ent){	unsigned long offset = swp_offset(ent);	int count = p->swap_map[offset];	if (count < SWAP_MAP_MAX) {		count--;		p->swap_map[offset] = count;		if (!count) {			if (offset < p->lowest_bit)				p->lowest_bit = offset;			if (offset > p->highest_bit)				p->highest_bit = offset;			if (p->prio > swap_info[swap_list.next].prio)				swap_list.next = p - swap_info;			nr_swap_pages++;			p->inuse_pages--;			mem_cgroup_uncharge_swap(ent);		}	}	return count;}/* * Caller has made sure that the swapdevice corresponding to entry * is still around or has not been recycled. */void swap_free(swp_entry_t entry){	struct swap_info_struct * p;	p = swap_info_get(entry);	if (p) {		swap_entry_free(p, entry);		spin_unlock(&swap_lock);	}}
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -