📄 swapfile.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
/* * How many references to page are currently swapped out? */static inline int page_swapcount(struct page *page){	int count = 0;	struct swap_info_struct *p;	swp_entry_t entry;	entry.val = page_private(page);	p = swap_info_get(entry);	if (p) {		/* Subtract the 1 for the swap cache itself */		count = p->swap_map[swp_offset(entry)] - 1;		spin_unlock(&swap_lock);	}	return count;}/* * We can write to an anon page without COW if there are no other references * to it.  And as a side-effect, free up its swap: because the old content * on disk will never be read, and seeking back there to write new content * later would only waste time away from clustering. */int reuse_swap_page(struct page *page){	int count;	VM_BUG_ON(!PageLocked(page));	count = page_mapcount(page);	if (count <= 1 && PageSwapCache(page)) {		count += page_swapcount(page);		if (count == 1 && !PageWriteback(page)) {			delete_from_swap_cache(page);			SetPageDirty(page);		}	}	return count == 1;}/* * If swap is getting full, or if there are no more mappings of this page, * then try_to_free_swap is called to free its swap space. */int try_to_free_swap(struct page *page){	VM_BUG_ON(!PageLocked(page));	if (!PageSwapCache(page))		return 0;	if (PageWriteback(page))		return 0;	if (page_swapcount(page))		return 0;	delete_from_swap_cache(page);	SetPageDirty(page);	return 1;}/* * Free the swap entry like above, but also try to * free the page cache entry if it is the last user. */int free_swap_and_cache(swp_entry_t entry){	struct swap_info_struct *p;	struct page *page = NULL;	if (is_migration_entry(entry))		return 1;	p = swap_info_get(entry);	if (p) {		if (swap_entry_free(p, entry) == 1) {			page = find_get_page(&swapper_space, entry.val);			if (page && !trylock_page(page)) {				page_cache_release(page);				page = NULL;			}		}		spin_unlock(&swap_lock);	}	if (page) {		/*		 * Not mapped elsewhere, or swap space full? Free it!		 * Also recheck PageSwapCache now page is locked (above).		 */		if (PageSwapCache(page) && !PageWriteback(page) &&				(!page_mapped(page) || vm_swap_full())) {			delete_from_swap_cache(page);			SetPageDirty(page);		}		unlock_page(page);		page_cache_release(page);	}	return p != NULL;}#ifdef CONFIG_HIBERNATION/* * Find the swap type that corresponds to given device (if any). * * @offset - number of the PAGE_SIZE-sized block of the device, starting * from 0, in which the swap header is expected to be located. * * This is needed for the suspend to disk (aka swsusp). */int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p){	struct block_device *bdev = NULL;	int i;	if (device)		bdev = bdget(device);	spin_lock(&swap_lock);	for (i = 0; i < nr_swapfiles; i++) {		struct swap_info_struct *sis = swap_info + i;		if (!(sis->flags & SWP_WRITEOK))			continue;		if (!bdev) {			if (bdev_p)				*bdev_p = bdget(sis->bdev->bd_dev);			spin_unlock(&swap_lock);			return i;		}		if (bdev == sis->bdev) {			struct swap_extent *se;			se = list_entry(sis->extent_list.next,					struct swap_extent, list);			if (se->start_block == offset) {				if (bdev_p)					*bdev_p = bdget(sis->bdev->bd_dev);				spin_unlock(&swap_lock);				bdput(bdev);				return i;			}		}	}	spin_unlock(&swap_lock);	if (bdev)		bdput(bdev);	return -ENODEV;}/* * Return either the total number of swap pages of given type, or the number * of free pages of that type (depending on @free) * * This is needed for software suspend */unsigned int count_swap_pages(int type, int free){	unsigned int n = 0;	if (type < nr_swapfiles) {		spin_lock(&swap_lock);		if (swap_info[type].flags & SWP_WRITEOK) {			n = swap_info[type].pages;			if (free)				n -= swap_info[type].inuse_pages;		}		spin_unlock(&swap_lock);	}	return n;}#endif/* * No need to decide whether this PTE shares the swap entry with others, * just let do_wp_page work it out if a write is requested later - to * force COW, vm_page_prot omits write permission from any private vma. */static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,		unsigned long addr, swp_entry_t entry, struct page *page){	struct mem_cgroup *ptr = NULL;	spinlock_t *ptl;	pte_t *pte;	int ret = 1;	if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) {		ret = -ENOMEM;		goto out_nolock;	}	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);	if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {		if (ret > 0)			mem_cgroup_cancel_charge_swapin(ptr);		ret = 0;		goto out;	}	inc_mm_counter(vma->vm_mm, anon_rss);	get_page(page);	set_pte_at(vma->vm_mm, addr, pte,		   pte_mkold(mk_pte(page, vma->vm_page_prot)));	page_add_anon_rmap(page, vma, addr);	mem_cgroup_commit_charge_swapin(page, ptr);	swap_free(entry);	/*	 * Move the page to the active list so it is not	 * immediately swapped out again after swapon.	 */	activate_page(page);out:	pte_unmap_unlock(pte, ptl);out_nolock:	return ret;}static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,				unsigned long addr, unsigned long end,				swp_entry_t entry, struct page *page){	pte_t swp_pte = swp_entry_to_pte(entry);	pte_t *pte;	int ret = 0;	/*	 * We don't actually need pte lock while scanning for swp_pte: since	 * we hold page lock and mmap_sem, swp_pte cannot be inserted into the	 * page table while we're scanning; though it could get zapped, and on	 * some architectures (e.g. x86_32 with PAE) we might catch a glimpse	 * of unmatched parts which look like swp_pte, so unuse_pte must	 * recheck under pte lock.  Scanning without pte lock lets it be	 * preemptible whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE.	 */	pte = pte_offset_map(pmd, addr);	do {		/*		 * swapoff spends a _lot_ of time in this loop!		 * Test inline before going to call unuse_pte.		 */		if (unlikely(pte_same(*pte, swp_pte))) {			pte_unmap(pte);			ret = unuse_pte(vma, pmd, addr, entry, page);			if (ret)				goto out;			pte = pte_offset_map(pmd, addr);		}	} while (pte++, addr += PAGE_SIZE, addr != end);	pte_unmap(pte - 1);out:	return ret;}static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,				unsigned long addr, unsigned long end,				swp_entry_t entry, struct page *page){	pmd_t *pmd;	unsigned long next;	int ret;	pmd = pmd_offset(pud, addr);	do {		next = pmd_addr_end(addr, end);		if (pmd_none_or_clear_bad(pmd))			continue;		ret = unuse_pte_range(vma, pmd, addr, next, entry, page);		if (ret)			return ret;	} while (pmd++, addr = next, addr != end);	return 0;}static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,				unsigned long addr, unsigned long end,				swp_entry_t entry, struct page *page){	pud_t *pud;	unsigned long next;	int ret;	pud = pud_offset(pgd, addr);	do {		next = pud_addr_end(addr, end);		if (pud_none_or_clear_bad(pud))			continue;		ret = unuse_pmd_range(vma, pud, addr, next, entry, page);		if (ret)			return ret;	} while (pud++, addr = next, addr != end);	return 0;}static int unuse_vma(struct vm_area_struct *vma,				swp_entry_t entry, struct page *page){	pgd_t *pgd;	unsigned long addr, end, next;	int ret;	if (page->mapping) {		addr = page_address_in_vma(page, vma);		if (addr == -EFAULT)			return 0;		else			end = addr + PAGE_SIZE;	} else {		addr = vma->vm_start;		end = vma->vm_end;	}	pgd = pgd_offset(vma->vm_mm, addr);	do {		next = pgd_addr_end(addr, end);		if (pgd_none_or_clear_bad(pgd))			continue;		ret = unuse_pud_range(vma, pgd, addr, next, entry, page);		if (ret)			return ret;	} while (pgd++, addr = next, addr != end);	return 0;}static int unuse_mm(struct mm_struct *mm,				swp_entry_t entry, struct page *page){	struct vm_area_struct *vma;	int ret = 0;	if (!down_read_trylock(&mm->mmap_sem)) {		/*		 * Activate page so shrink_inactive_list is unlikely to unmap		 * its ptes while lock is dropped, so swapoff can make progress.		 */		activate_page(page);		unlock_page(page);		down_read(&mm->mmap_sem);		lock_page(page);	}	for (vma = mm->mmap; vma; vma = vma->vm_next) {		if (vma->anon_vma && (ret = unuse_vma(vma, entry, page)))			break;	}	up_read(&mm->mmap_sem);	return (ret < 0)? ret: 0;}/* * Scan swap_map from current position to next entry still in use. * Recycle to start on reaching the end, returning 0 when empty. */static unsigned int find_next_to_unuse(struct swap_info_struct *si,					unsigned int prev){	unsigned int max = si->max;	unsigned int i = prev;	int count;	/*	 * No need for swap_lock here: we're just looking	 * for whether an entry is in use, not modifying it; false	 * hits are okay, and sys_swapoff() has already prevented new	 * allocations from this area (while holding swap_lock).	 */	for (;;) {		if (++i >= max) {			if (!prev) {				i = 0;				break;			}			/*			 * No entries in use at top of swap_map,			 * loop back to start and recheck there.			 */			max = prev + 1;			prev = 0;			i = 1;		}		count = si->swap_map[i];		if (count && count != SWAP_MAP_BAD)			break;	}	return i;}/* * We completely avoid races by reading each swap page in advance, * and then search for the process using it.  All the necessary * page table adjustments can then be made atomically. */static int try_to_unuse(unsigned int type){	struct swap_info_struct * si = &swap_info[type];	struct mm_struct *start_mm;	unsigned short *swap_map;	unsigned short swcount;	struct page *page;	swp_entry_t entry;	unsigned int i = 0;	int retval = 0;	int reset_overflow = 0;	int shmem;	/*	 * When searching mms for an entry, a good strategy is to	 * start at the first mm we freed the previous entry from	 * (though actually we don't notice whether we or coincidence	 * freed the entry).  Initialize this start_mm with a hold.	 *	 * A simpler strategy would be to start at the last mm we	 * freed the previous entry from; but that would take less	 * advantage of mmlist ordering, which clusters forked mms	 * together, child after parent.  If we race with dup_mmap(), we	 * prefer to resolve parent before child, lest we miss entries	 * duplicated after we scanned child: using last mm would invert	 * that.  Though it's only a serious concern when an overflowed	 * swap count is reset from SWAP_MAP_MAX, preventing a rescan.	 */	start_mm = &init_mm;	atomic_inc(&init_mm.mm_users);	/*	 * Keep on scanning until all entries have gone.  Usually,	 * one pass through swap_map is enough, but not necessarily:	 * there are races when an instance of an entry might be missed.	 */	while ((i = find_next_to_unuse(si, i)) != 0) {		if (signal_pending(current)) {			retval = -EINTR;			break;		}		/*		 * Get a page for the entry, using the existing swap		 * cache page if there is one.  Otherwise, get a clean		 * page and read the swap into it.		 */		swap_map = &si->swap_map[i];		entry = swp_entry(type, i);		page = read_swap_cache_async(entry,					GFP_HIGHUSER_MOVABLE, NULL, 0);		if (!page) {			/*			 * Either swap_duplicate() failed because entry			 * has been freed independently, and will not be			 * reused since sys_swapoff() already disabled			 * allocation from here, or alloc_page() failed.			 */			if (!*swap_map)				continue;			retval = -ENOMEM;			break;		}		/*		 * Don't hold on to start_mm if it looks like exiting.		 */		if (atomic_read(&start_mm->mm_users) == 1) {			mmput(start_mm);			start_mm = &init_mm;			atomic_inc(&init_mm.mm_users);		}		/*		 * Wait for and lock page.  When do_swap_page races with		 * try_to_unuse, do_swap_page can handle the fault much		 * faster than try_to_unuse can locate the entry.  This		 * apparently redundant "wait_on_page_locked" lets try_to_unuse		 * defer to do_swap_page in such a case - in some tests,		 * do_swap_page and try_to_unuse repeatedly compete.		 */		wait_on_page_locked(page);		wait_on_page_writeback(page);		lock_page(page);		wait_on_page_writeback(page);		/*		 * Remove all references to entry.		 * Whenever we reach init_mm, there's no address space		 * to search, but use it as a reminder to search shmem.		 */		shmem = 0;		swcount = *swap_map;		if (swcount > 1) {			if (start_mm == &init_mm)				shmem = shmem_unuse(entry, page);			else				retval = unuse_mm(start_mm, entry, page);		}		if (*swap_map > 1) {			int set_start_mm = (*swap_map >= swcount);			struct list_head *p = &start_mm->mmlist;			struct mm_struct *new_start_mm = start_mm;			struct mm_struct *prev_mm = start_mm;			struct mm_struct *mm;			atomic_inc(&new_start_mm->mm_users);			atomic_inc(&prev_mm->mm_users);			spin_lock(&mmlist_lock);			while (*swap_map > 1 && !retval && !shmem &&					(p = p->next) != &start_mm->mmlist) {				mm = list_entry(p, struct mm_struct, mmlist);				if (!atomic_inc_not_zero(&mm->mm_users))					continue;				spin_unlock(&mmlist_lock);				mmput(prev_mm);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -