📄 swapfile.c
字号:
/*
* linux/mm/swapfile.c
*
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
* Swap reorganised 29.12.95, Stephen Tweedie
*/
#include <linux/malloc.h>
#include <linux/smp_lock.h>
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#include <linux/swapctl.h>
#include <linux/blkdev.h> /* for blk_size */
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/shm.h>
#include <asm/pgtable.h>
spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
unsigned int nr_swapfiles;
struct swap_list_t swap_list = {-1, -1};
struct swap_info_struct swap_info[MAX_SWAPFILES];
#define SWAPFILE_CLUSTER 256
static inline int scan_swap_map(struct swap_info_struct *si, unsigned short count)
{
unsigned long offset;
/*
* We try to cluster swap pages by allocating them
* sequentially in swap. Once we've allocated
* SWAPFILE_CLUSTER pages this way, however, we resort to
* first-free allocation, starting a new cluster. This
* prevents us from scattering swap pages all over the entire
* swap partition, so that we reduce overall disk seek times
* between swap pages. -- sct */
if (si->cluster_nr) {
while (si->cluster_next <= si->highest_bit) {
offset = si->cluster_next++;
if (si->swap_map[offset])
continue;
si->cluster_nr--;
goto got_page;
}
}
si->cluster_nr = SWAPFILE_CLUSTER;
/* try to find an empty (even not aligned) cluster. */
offset = si->lowest_bit;
check_next_cluster:
if (offset+SWAPFILE_CLUSTER-1 <= si->highest_bit)
{
int nr;
for (nr = offset; nr < offset+SWAPFILE_CLUSTER; nr++)
if (si->swap_map[nr])
{
offset = nr+1;
goto check_next_cluster;
}
/* We found a completly empty cluster, so start
* using it.
*/
goto got_page;
}
/* No luck, so now go finegrined as usual. -Andrea */
for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
if (si->swap_map[offset])
continue;
got_page:
if (offset == si->lowest_bit)
si->lowest_bit++;
if (offset == si->highest_bit)
si->highest_bit--;
si->swap_map[offset] = count;
nr_swap_pages--;
si->cluster_next = offset+1;
return offset;
}
return 0;
}
swp_entry_t __get_swap_page(unsigned short count)
{
struct swap_info_struct * p;
unsigned long offset;
swp_entry_t entry;
int type, wrapped = 0;
entry.val = 0; /* Out of memory */
if (count >= SWAP_MAP_MAX)
goto bad_count;
swap_list_lock();
type = swap_list.next;
if (type < 0)
goto out;
if (nr_swap_pages == 0)
goto out;
while (1) {
p = &swap_info[type];
if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
swap_device_lock(p);
offset = scan_swap_map(p, count);
swap_device_unlock(p);
if (offset) {
entry = SWP_ENTRY(type,offset);
type = swap_info[type].next;
if (type < 0 ||
p->prio != swap_info[type].prio) {
swap_list.next = swap_list.head;
} else {
swap_list.next = type;
}
goto out;
}
}
type = p->next;
if (!wrapped) {
if (type < 0 || p->prio != swap_info[type].prio) {
type = swap_list.head;
wrapped = 1;
}
} else
if (type < 0)
goto out; /* out of swap space */
}
out:
swap_list_unlock();
return entry;
bad_count:
printk(KERN_ERR "get_swap_page: bad count %hd from %p\n",
count, __builtin_return_address(0));
goto out;
}
/*
* Caller has made sure that the swapdevice corresponding to entry
* is still around or has not been recycled.
*/
void __swap_free(swp_entry_t entry, unsigned short count)
{
struct swap_info_struct * p;
unsigned long offset, type;
if (!entry.val)
goto out;
type = SWP_TYPE(entry);
if (type >= nr_swapfiles)
goto bad_nofile;
p = & swap_info[type];
if (!(p->flags & SWP_USED))
goto bad_device;
offset = SWP_OFFSET(entry);
if (offset >= p->max)
goto bad_offset;
if (!p->swap_map[offset])
goto bad_free;
swap_list_lock();
if (p->prio > swap_info[swap_list.next].prio)
swap_list.next = type;
swap_device_lock(p);
if (p->swap_map[offset] < SWAP_MAP_MAX) {
if (p->swap_map[offset] < count)
goto bad_count;
if (!(p->swap_map[offset] -= count)) {
if (offset < p->lowest_bit)
p->lowest_bit = offset;
if (offset > p->highest_bit)
p->highest_bit = offset;
nr_swap_pages++;
}
}
swap_device_unlock(p);
swap_list_unlock();
out:
return;
bad_nofile:
printk("swap_free: Trying to free nonexistent swap-page\n");
goto out;
bad_device:
printk("swap_free: Trying to free swap from unused swap-device\n");
goto out;
bad_offset:
printk("swap_free: offset exceeds max\n");
goto out;
bad_free:
printk("VM: Bad swap entry %08lx\n", entry.val);
goto out;
bad_count:
swap_device_unlock(p);
swap_list_unlock();
printk(KERN_ERR "VM: Bad count %hd current count %hd\n", count, p->swap_map[offset]);
goto out;
}
/*
* The swap entry has been read in advance, and we return 1 to indicate
* that the page has been used or is no longer needed.
*
* Always set the resulting pte to be nowrite (the same as COW pages
* after one process has exited). We don't know just how many PTEs will
* share this swap entry, so be cautious and let do_wp_page work out
* what to do if a write is requested later.
*/
static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
pte_t *dir, swp_entry_t entry, struct page* page)
{
pte_t pte = *dir;
if (pte_none(pte))
return;
if (pte_present(pte)) {
/* If this entry is swap-cached, then page must already
hold the right address for any copies in physical
memory */
if (pte_page(pte) != page)
return;
/* We will be removing the swap cache in a moment, so... */
ptep_mkdirty(dir);
return;
}
if (pte_to_swp_entry(pte).val != entry.val)
return;
set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
swap_free(entry);
get_page(page);
++vma->vm_mm->rss;
}
static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
unsigned long address, unsigned long size, unsigned long offset,
swp_entry_t entry, struct page* page)
{
pte_t * pte;
unsigned long end;
if (pmd_none(*dir))
return;
if (pmd_bad(*dir)) {
pmd_ERROR(*dir);
pmd_clear(dir);
return;
}
pte = pte_offset(dir, address);
offset += address & PMD_MASK;
address &= ~PMD_MASK;
end = address + size;
if (end > PMD_SIZE)
end = PMD_SIZE;
do {
unuse_pte(vma, offset+address-vma->vm_start, pte, entry, page);
address += PAGE_SIZE;
pte++;
} while (address && (address < end));
}
static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
unsigned long address, unsigned long size,
swp_entry_t entry, struct page* page)
{
pmd_t * pmd;
unsigned long offset, end;
if (pgd_none(*dir))
return;
if (pgd_bad(*dir)) {
pgd_ERROR(*dir);
pgd_clear(dir);
return;
}
pmd = pmd_offset(dir, address);
offset = address & PGDIR_MASK;
address &= ~PGDIR_MASK;
end = address + size;
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
if (address >= end)
BUG();
do {
unuse_pmd(vma, pmd, address, end - address, offset, entry,
page);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address && (address < end));
}
static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
swp_entry_t entry, struct page* page)
{
unsigned long start = vma->vm_start, end = vma->vm_end;
if (start >= end)
BUG();
do {
unuse_pgd(vma, pgdir, start, end - start, entry, page);
start = (start + PGDIR_SIZE) & PGDIR_MASK;
pgdir++;
} while (start && (start < end));
}
static void unuse_process(struct mm_struct * mm,
swp_entry_t entry, struct page* page)
{
struct vm_area_struct* vma;
/*
* Go through process' page directory.
*/
if (!mm)
return;
spin_lock(&mm->page_table_lock);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
pgd_t * pgd = pgd_offset(mm, vma->vm_start);
unuse_vma(vma, pgd, entry, page);
}
spin_unlock(&mm->page_table_lock);
return;
}
/*
* We completely avoid races by reading each swap page in advance,
* and then search for the process using it. All the necessary
* page table adjustments can then be made atomically.
*/
static int try_to_unuse(unsigned int type)
{
struct swap_info_struct * si = &swap_info[type];
struct task_struct *p;
struct page *page;
swp_entry_t entry;
int i;
while (1) {
/*
* Find a swap page in use and read it in.
*/
swap_device_lock(si);
for (i = 1; i < si->max ; i++) {
if (si->swap_map[i] > 0 && si->swap_map[i] != SWAP_MAP_BAD) {
/*
* Prevent swaphandle from being completely
* unused by swap_free while we are trying
* to read in the page - this prevents warning
* messages from rw_swap_page_base.
*/
if (si->swap_map[i] != SWAP_MAP_MAX)
si->swap_map[i]++;
swap_device_unlock(si);
goto found_entry;
}
}
swap_device_unlock(si);
break;
found_entry:
entry = SWP_ENTRY(type, i);
/* Get a page for the entry, using the existing swap
cache page if there is one. Otherwise, get a clean
page and read the swap into it. */
page = read_swap_cache(entry);
if (!page) {
swap_free(entry);
return -ENOMEM;
}
if (PageSwapCache(page))
delete_from_swap_cache(page);
read_lock(&tasklist_lock);
for_each_task(p)
unuse_process(p->mm, entry, page);
read_unlock(&tasklist_lock);
shmem_unuse(entry, page);
/* Now get rid of the extra reference to the temporary
page we've been using. */
page_cache_release(page);
/*
* Check for and clear any overflowed swap map counts.
*/
swap_free(entry);
swap_list_lock();
swap_device_lock(si);
if (si->swap_map[i] > 0) {
if (si->swap_map[i] != SWAP_MAP_MAX)
printk("VM: Undead swap entry %08lx\n",
entry.val);
nr_swap_pages++;
si->swap_map[i] = 0;
}
swap_device_unlock(si);
swap_list_unlock();
}
return 0;
}
asmlinkage long sys_swapoff(const char * specialfile)
{
struct swap_info_struct * p = NULL;
struct nameidata nd;
int i, type, prev;
int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
err = user_path_walk(specialfile, &nd);
if (err)
goto out;
lock_kernel();
prev = -1;
swap_list_lock();
for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
p = swap_info + type;
if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
if (p->swap_file) {
if (p->swap_file == nd.dentry)
break;
} else {
if (S_ISBLK(nd.dentry->d_inode->i_mode)
&& (p->swap_device == nd.dentry->d_inode->i_rdev))
break;
}
}
prev = type;
}
err = -EINVAL;
if (type < 0) {
swap_list_unlock();
goto out_dput;
}
if (prev < 0) {
swap_list.head = p->next;
} else {
swap_info[prev].next = p->next;
}
if (type == swap_list.next) {
/* just pick something that's safe... */
swap_list.next = swap_list.head;
}
nr_swap_pages -= p->pages;
swap_list_unlock();
p->flags = SWP_USED;
err = try_to_unuse(type);
if (err) {
/* re-insert swap space back into swap_list */
swap_list_lock();
for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next)
if (p->prio >= swap_info[i].prio)
break;
p->next = i;
if (prev < 0)
swap_list.head = swap_list.next = p - swap_info;
else
swap_info[prev].next = p - swap_info;
nr_swap_pages += p->pages;
swap_list_unlock();
p->flags = SWP_WRITEOK;
goto out_dput;
}
if (p->swap_device)
blkdev_put(nd.dentry->d_inode->i_bdev, BDEV_SWAP);
path_release(&nd);
nd.dentry = p->swap_file;
p->swap_file = NULL;
nd.mnt = p->swap_vfsmnt;
p->swap_vfsmnt = NULL;
p->swap_device = 0;
vfree(p->swap_map);
p->swap_map = NULL;
p->flags = 0;
err = 0;
out_dput:
unlock_kernel();
path_release(&nd);
out:
return err;
}
int get_swaparea_info(char *buf)
{
char * page = (char *) __get_free_page(GFP_KERNEL);
struct swap_info_struct *ptr = swap_info;
int i, j, len = 0, usedswap;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -