📄 swapfile.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
				prev_mm = mm;				cond_resched();				swcount = *swap_map;				if (swcount <= 1)					;				else if (mm == &init_mm) {					set_start_mm = 1;					shmem = shmem_unuse(entry, page);				} else					retval = unuse_mm(mm, entry, page);				if (set_start_mm && *swap_map < swcount) {					mmput(new_start_mm);					atomic_inc(&mm->mm_users);					new_start_mm = mm;					set_start_mm = 0;				}				spin_lock(&mmlist_lock);			}			spin_unlock(&mmlist_lock);			mmput(prev_mm);			mmput(start_mm);			start_mm = new_start_mm;		}		if (shmem) {			/* page has already been unlocked and released */			if (shmem > 0)				continue;			retval = shmem;			break;		}		if (retval) {			unlock_page(page);			page_cache_release(page);			break;		}		/*		 * How could swap count reach 0x7fff when the maximum		 * pid is 0x7fff, and there's no way to repeat a swap		 * page within an mm (except in shmem, where it's the		 * shared object which takes the reference count)?		 * We believe SWAP_MAP_MAX cannot occur in Linux 2.4.		 *		 * If that's wrong, then we should worry more about		 * exit_mmap() and do_munmap() cases described above:		 * we might be resetting SWAP_MAP_MAX too early here.		 * We know "Undead"s can happen, they're okay, so don't		 * report them; but do report if we reset SWAP_MAP_MAX.		 */		if (*swap_map == SWAP_MAP_MAX) {			spin_lock(&swap_lock);			*swap_map = 1;			spin_unlock(&swap_lock);			reset_overflow = 1;		}		/*		 * If a reference remains (rare), we would like to leave		 * the page in the swap cache; but try_to_unmap could		 * then re-duplicate the entry once we drop page lock,		 * so we might loop indefinitely; also, that page could		 * not be swapped out to other storage meanwhile.  So:		 * delete from cache even if there's another reference,		 * after ensuring that the data has been saved to disk -		 * since if the reference remains (rarer), it will be		 * read from disk into another page.  Splitting into two		 * pages would be incorrect if swap supported "shared		 * private" pages, but they are handled by tmpfs files.		 */		if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {			struct writeback_control wbc = {				.sync_mode = WB_SYNC_NONE,			};			swap_writepage(page, &wbc);			lock_page(page);			wait_on_page_writeback(page);		}		/*		 * It is conceivable that a racing task removed this page from		 * swap cache just before we acquired the page lock at the top,		 * or while we dropped it in unuse_mm().  The page might even		 * be back in swap cache on another swap area: that we must not		 * delete, since it may not have been written out to swap yet.		 */		if (PageSwapCache(page) &&		    likely(page_private(page) == entry.val))			delete_from_swap_cache(page);		/*		 * So we could skip searching mms once swap count went		 * to 1, we did not mark any present ptes as dirty: must		 * mark page dirty so shrink_page_list will preserve it.		 */		SetPageDirty(page);		unlock_page(page);		page_cache_release(page);		/*		 * Make sure that we aren't completely killing		 * interactive performance.		 */		cond_resched();	}	mmput(start_mm);	if (reset_overflow) {		printk(KERN_WARNING "swapoff: cleared swap entry overflow\n");		swap_overflow = 0;	}	return retval;}/* * After a successful try_to_unuse, if no swap is now in use, we know * we can empty the mmlist.  swap_lock must be held on entry and exit. * Note that mmlist_lock nests inside swap_lock, and an mm must be * added to the mmlist just after page_duplicate - before would be racy. */static void drain_mmlist(void){	struct list_head *p, *next;	unsigned int i;	for (i = 0; i < nr_swapfiles; i++)		if (swap_info[i].inuse_pages)			return;	spin_lock(&mmlist_lock);	list_for_each_safe(p, next, &init_mm.mmlist)		list_del_init(p);	spin_unlock(&mmlist_lock);}/* * Use this swapdev's extent info to locate the (PAGE_SIZE) block which * corresponds to page offset `offset'. */sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset){	struct swap_extent *se = sis->curr_swap_extent;	struct swap_extent *start_se = se;	for ( ; ; ) {		struct list_head *lh;		if (se->start_page <= offset &&				offset < (se->start_page + se->nr_pages)) {			return se->start_block + (offset - se->start_page);		}		lh = se->list.next;		if (lh == &sis->extent_list)			lh = lh->next;		se = list_entry(lh, struct swap_extent, list);		sis->curr_swap_extent = se;		BUG_ON(se == start_se);		/* It *must* be present */	}}#ifdef CONFIG_HIBERNATION/* * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev * corresponding to given index in swap_info (swap type). */sector_t swapdev_block(int swap_type, pgoff_t offset){	struct swap_info_struct *sis;	if (swap_type >= nr_swapfiles)		return 0;	sis = swap_info + swap_type;	return (sis->flags & SWP_WRITEOK) ? map_swap_page(sis, offset) : 0;}#endif /* CONFIG_HIBERNATION *//* * Free all of a swapdev's extent information */static void destroy_swap_extents(struct swap_info_struct *sis){	while (!list_empty(&sis->extent_list)) {		struct swap_extent *se;		se = list_entry(sis->extent_list.next,				struct swap_extent, list);		list_del(&se->list);		kfree(se);	}}/* * Add a block range (and the corresponding page range) into this swapdev's * extent list.  The extent list is kept sorted in page order. * * This function rather assumes that it is called in ascending page order. */static intadd_swap_extent(struct swap_info_struct *sis, unsigned long start_page,		unsigned long nr_pages, sector_t start_block){	struct swap_extent *se;	struct swap_extent *new_se;	struct list_head *lh;	lh = sis->extent_list.prev;	/* The highest page extent */	if (lh != &sis->extent_list) {		se = list_entry(lh, struct swap_extent, list);		BUG_ON(se->start_page + se->nr_pages != start_page);		if (se->start_block + se->nr_pages == start_block) {			/* Merge it */			se->nr_pages += nr_pages;			return 0;		}	}	/*	 * No merge.  Insert a new extent, preserving ordering.	 */	new_se = kmalloc(sizeof(*se), GFP_KERNEL);	if (new_se == NULL)		return -ENOMEM;	new_se->start_page = start_page;	new_se->nr_pages = nr_pages;	new_se->start_block = start_block;	list_add_tail(&new_se->list, &sis->extent_list);	return 1;}/* * A `swap extent' is a simple thing which maps a contiguous range of pages * onto a contiguous range of disk blocks.  An ordered list of swap extents * is built at swapon time and is then used at swap_writepage/swap_readpage * time for locating where on disk a page belongs. * * If the swapfile is an S_ISBLK block device, a single extent is installed. * This is done so that the main operating code can treat S_ISBLK and S_ISREG * swap files identically. * * Whether the swapdev is an S_ISREG file or an S_ISBLK blockdev, the swap * extent list operates in PAGE_SIZE disk blocks.  Both S_ISREG and S_ISBLK * swapfiles are handled *identically* after swapon time. * * For S_ISREG swapfiles, setup_swap_extents() will walk all the file's blocks * and will parse them into an ordered extent list, in PAGE_SIZE chunks.  If * some stray blocks are found which do not fall within the PAGE_SIZE alignment * requirements, they are simply tossed out - we will never use those blocks * for swapping. * * For S_ISREG swapfiles we set S_SWAPFILE across the life of the swapon.  This * prevents root from shooting her foot off by ftruncating an in-use swapfile, * which will scribble on the fs. * * The amount of disk space which a single swap extent represents varies. * Typically it is in the 1-4 megabyte range.  So we can have hundreds of * extents in the list.  To avoid much list walking, we cache the previous * search location in `curr_swap_extent', and start new searches from there. * This is extremely effective.  The average number of iterations in * map_swap_page() has been measured at about 0.3 per page.  - akpm. */static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span){	struct inode *inode;	unsigned blocks_per_page;	unsigned long page_no;	unsigned blkbits;	sector_t probe_block;	sector_t last_block;	sector_t lowest_block = -1;	sector_t highest_block = 0;	int nr_extents = 0;	int ret;	inode = sis->swap_file->f_mapping->host;	if (S_ISBLK(inode->i_mode)) {		ret = add_swap_extent(sis, 0, sis->max, 0);		*span = sis->pages;		goto done;	}	blkbits = inode->i_blkbits;	blocks_per_page = PAGE_SIZE >> blkbits;	/*	 * Map all the blocks into the extent list.  This code doesn't try	 * to be very smart.	 */	probe_block = 0;	page_no = 0;	last_block = i_size_read(inode) >> blkbits;	while ((probe_block + blocks_per_page) <= last_block &&			page_no < sis->max) {		unsigned block_in_page;		sector_t first_block;		first_block = bmap(inode, probe_block);		if (first_block == 0)			goto bad_bmap;		/*		 * It must be PAGE_SIZE aligned on-disk		 */		if (first_block & (blocks_per_page - 1)) {			probe_block++;			goto reprobe;		}		for (block_in_page = 1; block_in_page < blocks_per_page;					block_in_page++) {			sector_t block;			block = bmap(inode, probe_block + block_in_page);			if (block == 0)				goto bad_bmap;			if (block != first_block + block_in_page) {				/* Discontiguity */				probe_block++;				goto reprobe;			}		}		first_block >>= (PAGE_SHIFT - blkbits);		if (page_no) {	/* exclude the header page */			if (first_block < lowest_block)				lowest_block = first_block;			if (first_block > highest_block)				highest_block = first_block;		}		/*		 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks		 */		ret = add_swap_extent(sis, page_no, 1, first_block);		if (ret < 0)			goto out;		nr_extents += ret;		page_no++;		probe_block += blocks_per_page;reprobe:		continue;	}	ret = nr_extents;	*span = 1 + highest_block - lowest_block;	if (page_no == 0)		page_no = 1;	/* force Empty message */	sis->max = page_no;	sis->pages = page_no - 1;	sis->highest_bit = page_no - 1;done:	sis->curr_swap_extent = list_entry(sis->extent_list.prev,					struct swap_extent, list);	goto out;bad_bmap:	printk(KERN_ERR "swapon: swapfile has holes\n");	ret = -EINVAL;out:	return ret;}SYSCALL_DEFINE1(swapoff, const char __user *, specialfile){	struct swap_info_struct * p = NULL;	unsigned short *swap_map;	struct file *swap_file, *victim;	struct address_space *mapping;	struct inode *inode;	char * pathname;	int i, type, prev;	int err;	if (!capable(CAP_SYS_ADMIN))		return -EPERM;	pathname = getname(specialfile);	err = PTR_ERR(pathname);	if (IS_ERR(pathname))		goto out;	victim = filp_open(pathname, O_RDWR|O_LARGEFILE, 0);	putname(pathname);	err = PTR_ERR(victim);	if (IS_ERR(victim))		goto out;	mapping = victim->f_mapping;	prev = -1;	spin_lock(&swap_lock);	for (type = swap_list.head; type >= 0; type = swap_info[type].next) {		p = swap_info + type;		if (p->flags & SWP_WRITEOK) {			if (p->swap_file->f_mapping == mapping)				break;		}		prev = type;	}	if (type < 0) {		err = -EINVAL;		spin_unlock(&swap_lock);		goto out_dput;	}	if (!security_vm_enough_memory(p->pages))		vm_unacct_memory(p->pages);	else {		err = -ENOMEM;		spin_unlock(&swap_lock);		goto out_dput;	}	if (prev < 0) {		swap_list.head = p->next;	} else {		swap_info[prev].next = p->next;	}	if (type == swap_list.next) {		/* just pick something that's safe... */		swap_list.next = swap_list.head;	}	if (p->prio < 0) {		for (i = p->next; i >= 0; i = swap_info[i].next)			swap_info[i].prio = p->prio--;		least_priority++;	}	nr_swap_pages -= p->pages;	total_swap_pages -= p->pages;	p->flags &= ~SWP_WRITEOK;	spin_unlock(&swap_lock);	current->flags |= PF_SWAPOFF;	err = try_to_unuse(type);	current->flags &= ~PF_SWAPOFF;	if (err) {		/* re-insert swap space back into swap_list */		spin_lock(&swap_lock);		if (p->prio < 0)			p->prio = --least_priority;		prev = -1;		for (i = swap_list.head; i >= 0; i = swap_info[i].next) {			if (p->prio >= swap_info[i].prio)				break;			prev = i;		}		p->next = i;		if (prev < 0)			swap_list.head = swap_list.next = p - swap_info;		else			swap_info[prev].next = p - swap_info;		nr_swap_pages += p->pages;		total_swap_pages += p->pages;		p->flags |= SWP_WRITEOK;		spin_unlock(&swap_lock);		goto out_dput;	}	/* wait for any unplug function to finish */	down_write(&swap_unplug_sem);	up_write(&swap_unplug_sem);	destroy_swap_extents(p);	mutex_lock(&swapon_mutex);	spin_lock(&swap_lock);	drain_mmlist();	/* wait for anyone still in scan_swap_map */	p->highest_bit = 0;		/* cuts scans short */	while (p->flags >= SWP_SCANNING) {		spin_unlock(&swap_lock);		schedule_timeout_uninterruptible(1);		spin_lock(&swap_lock);	}	swap_file = p->swap_file;	p->swap_file = NULL;	p->max = 0;	swap_map = p->swap_map;	p->swap_map = NULL;	p->flags = 0;	spin_unlock(&swap_lock);	mutex_unlock(&swapon_mutex);	vfree(swap_map);	/* Destroy swap account informatin */	swap_cgroup_swapoff(type);	inode = mapping->host;	if (S_ISBLK(inode->i_mode)) {		struct block_device *bdev = I_BDEV(inode);		set_blocksize(bdev, p->old_block_size);		bd_release(bdev);	} else {		mutex_lock(&inode->i_mutex);		inode->i_flags &= ~S_SWAPFILE;		mutex_unlock(&inode->i_mutex);	}	filp_close(swap_file, NULL);	err = 0;out_dput:	filp_close(victim, NULL);out:	return err;}#ifdef CONFIG_PROC_FS/* iterator */static void *swap_start(struct seq_file *swap, loff_t *pos){	struct swap_info_struct *ptr = swap_info;	int i;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -