📄 swapfile.c
字号:
prev_mm = mm; cond_resched(); swcount = *swap_map; if (swcount <= 1) ; else if (mm == &init_mm) { set_start_mm = 1; shmem = shmem_unuse(entry, page); } else retval = unuse_mm(mm, entry, page); if (set_start_mm && *swap_map < swcount) { mmput(new_start_mm); atomic_inc(&mm->mm_users); new_start_mm = mm; set_start_mm = 0; } spin_lock(&mmlist_lock); } spin_unlock(&mmlist_lock); mmput(prev_mm); mmput(start_mm); start_mm = new_start_mm; } if (shmem) { /* page has already been unlocked and released */ if (shmem > 0) continue; retval = shmem; break; } if (retval) { unlock_page(page); page_cache_release(page); break; } /* * How could swap count reach 0x7fff when the maximum * pid is 0x7fff, and there's no way to repeat a swap * page within an mm (except in shmem, where it's the * shared object which takes the reference count)? * We believe SWAP_MAP_MAX cannot occur in Linux 2.4. * * If that's wrong, then we should worry more about * exit_mmap() and do_munmap() cases described above: * we might be resetting SWAP_MAP_MAX too early here. * We know "Undead"s can happen, they're okay, so don't * report them; but do report if we reset SWAP_MAP_MAX. */ if (*swap_map == SWAP_MAP_MAX) { spin_lock(&swap_lock); *swap_map = 1; spin_unlock(&swap_lock); reset_overflow = 1; } /* * If a reference remains (rare), we would like to leave * the page in the swap cache; but try_to_unmap could * then re-duplicate the entry once we drop page lock, * so we might loop indefinitely; also, that page could * not be swapped out to other storage meanwhile. So: * delete from cache even if there's another reference, * after ensuring that the data has been saved to disk - * since if the reference remains (rarer), it will be * read from disk into another page. Splitting into two * pages would be incorrect if swap supported "shared * private" pages, but they are handled by tmpfs files. */ if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) { struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, }; swap_writepage(page, &wbc); lock_page(page); wait_on_page_writeback(page); } /* * It is conceivable that a racing task removed this page from * swap cache just before we acquired the page lock at the top, * or while we dropped it in unuse_mm(). The page might even * be back in swap cache on another swap area: that we must not * delete, since it may not have been written out to swap yet. */ if (PageSwapCache(page) && likely(page_private(page) == entry.val)) delete_from_swap_cache(page); /* * So we could skip searching mms once swap count went * to 1, we did not mark any present ptes as dirty: must * mark page dirty so shrink_page_list will preserve it. */ SetPageDirty(page); unlock_page(page); page_cache_release(page); /* * Make sure that we aren't completely killing * interactive performance. */ cond_resched(); } mmput(start_mm); if (reset_overflow) { printk(KERN_WARNING "swapoff: cleared swap entry overflow\n"); swap_overflow = 0; } return retval;}/* * After a successful try_to_unuse, if no swap is now in use, we know * we can empty the mmlist. swap_lock must be held on entry and exit. * Note that mmlist_lock nests inside swap_lock, and an mm must be * added to the mmlist just after page_duplicate - before would be racy. */static void drain_mmlist(void){ struct list_head *p, *next; unsigned int i; for (i = 0; i < nr_swapfiles; i++) if (swap_info[i].inuse_pages) return; spin_lock(&mmlist_lock); list_for_each_safe(p, next, &init_mm.mmlist) list_del_init(p); spin_unlock(&mmlist_lock);}/* * Use this swapdev's extent info to locate the (PAGE_SIZE) block which * corresponds to page offset `offset'. */sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset){ struct swap_extent *se = sis->curr_swap_extent; struct swap_extent *start_se = se; for ( ; ; ) { struct list_head *lh; if (se->start_page <= offset && offset < (se->start_page + se->nr_pages)) { return se->start_block + (offset - se->start_page); } lh = se->list.next; if (lh == &sis->extent_list) lh = lh->next; se = list_entry(lh, struct swap_extent, list); sis->curr_swap_extent = se; BUG_ON(se == start_se); /* It *must* be present */ }}#ifdef CONFIG_HIBERNATION/* * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev * corresponding to given index in swap_info (swap type). */sector_t swapdev_block(int swap_type, pgoff_t offset){ struct swap_info_struct *sis; if (swap_type >= nr_swapfiles) return 0; sis = swap_info + swap_type; return (sis->flags & SWP_WRITEOK) ? map_swap_page(sis, offset) : 0;}#endif /* CONFIG_HIBERNATION *//* * Free all of a swapdev's extent information */static void destroy_swap_extents(struct swap_info_struct *sis){ while (!list_empty(&sis->extent_list)) { struct swap_extent *se; se = list_entry(sis->extent_list.next, struct swap_extent, list); list_del(&se->list); kfree(se); }}/* * Add a block range (and the corresponding page range) into this swapdev's * extent list. The extent list is kept sorted in page order. * * This function rather assumes that it is called in ascending page order. */static intadd_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block){ struct swap_extent *se; struct swap_extent *new_se; struct list_head *lh; lh = sis->extent_list.prev; /* The highest page extent */ if (lh != &sis->extent_list) { se = list_entry(lh, struct swap_extent, list); BUG_ON(se->start_page + se->nr_pages != start_page); if (se->start_block + se->nr_pages == start_block) { /* Merge it */ se->nr_pages += nr_pages; return 0; } } /* * No merge. Insert a new extent, preserving ordering. */ new_se = kmalloc(sizeof(*se), GFP_KERNEL); if (new_se == NULL) return -ENOMEM; new_se->start_page = start_page; new_se->nr_pages = nr_pages; new_se->start_block = start_block; list_add_tail(&new_se->list, &sis->extent_list); return 1;}/* * A `swap extent' is a simple thing which maps a contiguous range of pages * onto a contiguous range of disk blocks. An ordered list of swap extents * is built at swapon time and is then used at swap_writepage/swap_readpage * time for locating where on disk a page belongs. * * If the swapfile is an S_ISBLK block device, a single extent is installed. * This is done so that the main operating code can treat S_ISBLK and S_ISREG * swap files identically. * * Whether the swapdev is an S_ISREG file or an S_ISBLK blockdev, the swap * extent list operates in PAGE_SIZE disk blocks. Both S_ISREG and S_ISBLK * swapfiles are handled *identically* after swapon time. * * For S_ISREG swapfiles, setup_swap_extents() will walk all the file's blocks * and will parse them into an ordered extent list, in PAGE_SIZE chunks. If * some stray blocks are found which do not fall within the PAGE_SIZE alignment * requirements, they are simply tossed out - we will never use those blocks * for swapping. * * For S_ISREG swapfiles we set S_SWAPFILE across the life of the swapon. This * prevents root from shooting her foot off by ftruncating an in-use swapfile, * which will scribble on the fs. * * The amount of disk space which a single swap extent represents varies. * Typically it is in the 1-4 megabyte range. So we can have hundreds of * extents in the list. To avoid much list walking, we cache the previous * search location in `curr_swap_extent', and start new searches from there. * This is extremely effective. The average number of iterations in * map_swap_page() has been measured at about 0.3 per page. - akpm. */static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span){ struct inode *inode; unsigned blocks_per_page; unsigned long page_no; unsigned blkbits; sector_t probe_block; sector_t last_block; sector_t lowest_block = -1; sector_t highest_block = 0; int nr_extents = 0; int ret; inode = sis->swap_file->f_mapping->host; if (S_ISBLK(inode->i_mode)) { ret = add_swap_extent(sis, 0, sis->max, 0); *span = sis->pages; goto done; } blkbits = inode->i_blkbits; blocks_per_page = PAGE_SIZE >> blkbits; /* * Map all the blocks into the extent list. This code doesn't try * to be very smart. */ probe_block = 0; page_no = 0; last_block = i_size_read(inode) >> blkbits; while ((probe_block + blocks_per_page) <= last_block && page_no < sis->max) { unsigned block_in_page; sector_t first_block; first_block = bmap(inode, probe_block); if (first_block == 0) goto bad_bmap; /* * It must be PAGE_SIZE aligned on-disk */ if (first_block & (blocks_per_page - 1)) { probe_block++; goto reprobe; } for (block_in_page = 1; block_in_page < blocks_per_page; block_in_page++) { sector_t block; block = bmap(inode, probe_block + block_in_page); if (block == 0) goto bad_bmap; if (block != first_block + block_in_page) { /* Discontiguity */ probe_block++; goto reprobe; } } first_block >>= (PAGE_SHIFT - blkbits); if (page_no) { /* exclude the header page */ if (first_block < lowest_block) lowest_block = first_block; if (first_block > highest_block) highest_block = first_block; } /* * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks */ ret = add_swap_extent(sis, page_no, 1, first_block); if (ret < 0) goto out; nr_extents += ret; page_no++; probe_block += blocks_per_page;reprobe: continue; } ret = nr_extents; *span = 1 + highest_block - lowest_block; if (page_no == 0) page_no = 1; /* force Empty message */ sis->max = page_no; sis->pages = page_no - 1; sis->highest_bit = page_no - 1;done: sis->curr_swap_extent = list_entry(sis->extent_list.prev, struct swap_extent, list); goto out;bad_bmap: printk(KERN_ERR "swapon: swapfile has holes\n"); ret = -EINVAL;out: return ret;}SYSCALL_DEFINE1(swapoff, const char __user *, specialfile){ struct swap_info_struct * p = NULL; unsigned short *swap_map; struct file *swap_file, *victim; struct address_space *mapping; struct inode *inode; char * pathname; int i, type, prev; int err; if (!capable(CAP_SYS_ADMIN)) return -EPERM; pathname = getname(specialfile); err = PTR_ERR(pathname); if (IS_ERR(pathname)) goto out; victim = filp_open(pathname, O_RDWR|O_LARGEFILE, 0); putname(pathname); err = PTR_ERR(victim); if (IS_ERR(victim)) goto out; mapping = victim->f_mapping; prev = -1; spin_lock(&swap_lock); for (type = swap_list.head; type >= 0; type = swap_info[type].next) { p = swap_info + type; if (p->flags & SWP_WRITEOK) { if (p->swap_file->f_mapping == mapping) break; } prev = type; } if (type < 0) { err = -EINVAL; spin_unlock(&swap_lock); goto out_dput; } if (!security_vm_enough_memory(p->pages)) vm_unacct_memory(p->pages); else { err = -ENOMEM; spin_unlock(&swap_lock); goto out_dput; } if (prev < 0) { swap_list.head = p->next; } else { swap_info[prev].next = p->next; } if (type == swap_list.next) { /* just pick something that's safe... */ swap_list.next = swap_list.head; } if (p->prio < 0) { for (i = p->next; i >= 0; i = swap_info[i].next) swap_info[i].prio = p->prio--; least_priority++; } nr_swap_pages -= p->pages; total_swap_pages -= p->pages; p->flags &= ~SWP_WRITEOK; spin_unlock(&swap_lock); current->flags |= PF_SWAPOFF; err = try_to_unuse(type); current->flags &= ~PF_SWAPOFF; if (err) { /* re-insert swap space back into swap_list */ spin_lock(&swap_lock); if (p->prio < 0) p->prio = --least_priority; prev = -1; for (i = swap_list.head; i >= 0; i = swap_info[i].next) { if (p->prio >= swap_info[i].prio) break; prev = i; } p->next = i; if (prev < 0) swap_list.head = swap_list.next = p - swap_info; else swap_info[prev].next = p - swap_info; nr_swap_pages += p->pages; total_swap_pages += p->pages; p->flags |= SWP_WRITEOK; spin_unlock(&swap_lock); goto out_dput; } /* wait for any unplug function to finish */ down_write(&swap_unplug_sem); up_write(&swap_unplug_sem); destroy_swap_extents(p); mutex_lock(&swapon_mutex); spin_lock(&swap_lock); drain_mmlist(); /* wait for anyone still in scan_swap_map */ p->highest_bit = 0; /* cuts scans short */ while (p->flags >= SWP_SCANNING) { spin_unlock(&swap_lock); schedule_timeout_uninterruptible(1); spin_lock(&swap_lock); } swap_file = p->swap_file; p->swap_file = NULL; p->max = 0; swap_map = p->swap_map; p->swap_map = NULL; p->flags = 0; spin_unlock(&swap_lock); mutex_unlock(&swapon_mutex); vfree(swap_map); /* Destroy swap account informatin */ swap_cgroup_swapoff(type); inode = mapping->host; if (S_ISBLK(inode->i_mode)) { struct block_device *bdev = I_BDEV(inode); set_blocksize(bdev, p->old_block_size); bd_release(bdev); } else { mutex_lock(&inode->i_mutex); inode->i_flags &= ~S_SWAPFILE; mutex_unlock(&inode->i_mutex); } filp_close(swap_file, NULL); err = 0;out_dput: filp_close(victim, NULL);out: return err;}#ifdef CONFIG_PROC_FS/* iterator */static void *swap_start(struct seq_file *swap, loff_t *pos){ struct swap_info_struct *ptr = swap_info; int i;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -