📄 swapfile.c

📁 linux下内存管理源代码。。。精、简、强!
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*
 *  linux/mm/swapfile.c
 *
 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
 *  Swap reorganised 29.12.95, Stephen Tweedie
 */

#include <linux/malloc.h>
#include <linux/smp_lock.h>
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#include <linux/swapctl.h>
#include <linux/blkdev.h> /* for blk_size */
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/shm.h>

#include <asm/pgtable.h>

spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
unsigned int nr_swapfiles;

struct swap_list_t swap_list = {-1, -1};

struct swap_info_struct swap_info[MAX_SWAPFILES];

#define SWAPFILE_CLUSTER 256

static inline int scan_swap_map(struct swap_info_struct *si, unsigned short count)
{
	unsigned long offset;
	/* 
	 * We try to cluster swap pages by allocating them
	 * sequentially in swap.  Once we've allocated
	 * SWAPFILE_CLUSTER pages this way, however, we resort to
	 * first-free allocation, starting a new cluster.  This
	 * prevents us from scattering swap pages all over the entire
	 * swap partition, so that we reduce overall disk seek times
	 * between swap pages.  -- sct */
	if (si->cluster_nr) {
		while (si->cluster_next <= si->highest_bit) {
			offset = si->cluster_next++;
			if (si->swap_map[offset])
				continue;
			si->cluster_nr--;
			goto got_page;
		}
	}
	si->cluster_nr = SWAPFILE_CLUSTER;

	/* try to find an empty (even not aligned) cluster. */
	offset = si->lowest_bit;
 check_next_cluster:
	if (offset+SWAPFILE_CLUSTER-1 <= si->highest_bit)
	{
		int nr;
		for (nr = offset; nr < offset+SWAPFILE_CLUSTER; nr++)
			if (si->swap_map[nr])
			{
				offset = nr+1;
				goto check_next_cluster;
			}
		/* We found a completly empty cluster, so start
		 * using it.
		 */
		goto got_page;
	}
	/* No luck, so now go finegrined as usual. -Andrea */
	for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
		if (si->swap_map[offset])
			continue;
	got_page:
		if (offset == si->lowest_bit)
			si->lowest_bit++;
		if (offset == si->highest_bit)
			si->highest_bit--;
		si->swap_map[offset] = count;
		nr_swap_pages--;
		si->cluster_next = offset+1;
		return offset;
	}
	return 0;
}

swp_entry_t __get_swap_page(unsigned short count)
{
	struct swap_info_struct * p;
	unsigned long offset;
	swp_entry_t entry;
	int type, wrapped = 0;

	entry.val = 0;	/* Out of memory */
	if (count >= SWAP_MAP_MAX)
		goto bad_count;
	swap_list_lock();
	type = swap_list.next;
	if (type < 0)
		goto out;
	if (nr_swap_pages == 0)
		goto out;

	while (1) {
		p = &swap_info[type];
		if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
			swap_device_lock(p);
			offset = scan_swap_map(p, count);
			swap_device_unlock(p);
			if (offset) {
				entry = SWP_ENTRY(type,offset);
				type = swap_info[type].next;
				if (type < 0 ||
					p->prio != swap_info[type].prio) {
						swap_list.next = swap_list.head;
				} else {
					swap_list.next = type;
				}
				goto out;
			}
		}
		type = p->next;
		if (!wrapped) {
			if (type < 0 || p->prio != swap_info[type].prio) {
				type = swap_list.head;
				wrapped = 1;
			}
		} else
			if (type < 0)
				goto out;	/* out of swap space */
	}
out:
	swap_list_unlock();
	return entry;

bad_count:
	printk(KERN_ERR "get_swap_page: bad count %hd from %p\n",
	       count, __builtin_return_address(0));
	goto out;
}


/*
 * Caller has made sure that the swapdevice corresponding to entry
 * is still around or has not been recycled.
 */
void __swap_free(swp_entry_t entry, unsigned short count)
{
	struct swap_info_struct * p;
	unsigned long offset, type;

	if (!entry.val)
		goto out;

	type = SWP_TYPE(entry);
	if (type >= nr_swapfiles)
		goto bad_nofile;
	p = & swap_info[type];
	if (!(p->flags & SWP_USED))
		goto bad_device;
	offset = SWP_OFFSET(entry);
	if (offset >= p->max)
		goto bad_offset;
	if (!p->swap_map[offset])
		goto bad_free;
	swap_list_lock();
	if (p->prio > swap_info[swap_list.next].prio)
		swap_list.next = type;
	swap_device_lock(p);
	if (p->swap_map[offset] < SWAP_MAP_MAX) {
		if (p->swap_map[offset] < count)
			goto bad_count;
		if (!(p->swap_map[offset] -= count)) {
			if (offset < p->lowest_bit)
				p->lowest_bit = offset;
			if (offset > p->highest_bit)
				p->highest_bit = offset;
			nr_swap_pages++;
		}
	}
	swap_device_unlock(p);
	swap_list_unlock();
out:
	return;

bad_nofile:
	printk("swap_free: Trying to free nonexistent swap-page\n");
	goto out;
bad_device:
	printk("swap_free: Trying to free swap from unused swap-device\n");
	goto out;
bad_offset:
	printk("swap_free: offset exceeds max\n");
	goto out;
bad_free:
	printk("VM: Bad swap entry %08lx\n", entry.val);
	goto out;
bad_count:
	swap_device_unlock(p);
	swap_list_unlock();
	printk(KERN_ERR "VM: Bad count %hd current count %hd\n", count, p->swap_map[offset]);
	goto out;
}

/*
 * The swap entry has been read in advance, and we return 1 to indicate
 * that the page has been used or is no longer needed.
 *
 * Always set the resulting pte to be nowrite (the same as COW pages
 * after one process has exited).  We don't know just how many PTEs will
 * share this swap entry, so be cautious and let do_wp_page work out
 * what to do if a write is requested later.
 */
static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
	pte_t *dir, swp_entry_t entry, struct page* page)
{
	pte_t pte = *dir;

	if (pte_none(pte))
		return;
	if (pte_present(pte)) {
		/* If this entry is swap-cached, then page must already
                   hold the right address for any copies in physical
                   memory */
		if (pte_page(pte) != page)
			return;
		/* We will be removing the swap cache in a moment, so... */
		ptep_mkdirty(dir);
		return;
	}
	if (pte_to_swp_entry(pte).val != entry.val)
		return;
	set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
	swap_free(entry);
	get_page(page);
	++vma->vm_mm->rss;
}

static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
	unsigned long address, unsigned long size, unsigned long offset,
	swp_entry_t entry, struct page* page)
{
	pte_t * pte;
	unsigned long end;

	if (pmd_none(*dir))
		return;
	if (pmd_bad(*dir)) {
		pmd_ERROR(*dir);
		pmd_clear(dir);
		return;
	}
	pte = pte_offset(dir, address);
	offset += address & PMD_MASK;
	address &= ~PMD_MASK;
	end = address + size;
	if (end > PMD_SIZE)
		end = PMD_SIZE;
	do {
		unuse_pte(vma, offset+address-vma->vm_start, pte, entry, page);
		address += PAGE_SIZE;
		pte++;
	} while (address && (address < end));
}

static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
	unsigned long address, unsigned long size,
	swp_entry_t entry, struct page* page)
{
	pmd_t * pmd;
	unsigned long offset, end;

	if (pgd_none(*dir))
		return;
	if (pgd_bad(*dir)) {
		pgd_ERROR(*dir);
		pgd_clear(dir);
		return;
	}
	pmd = pmd_offset(dir, address);
	offset = address & PGDIR_MASK;
	address &= ~PGDIR_MASK;
	end = address + size;
	if (end > PGDIR_SIZE)
		end = PGDIR_SIZE;
	if (address >= end)
		BUG();
	do {
		unuse_pmd(vma, pmd, address, end - address, offset, entry,
			  page);
		address = (address + PMD_SIZE) & PMD_MASK;
		pmd++;
	} while (address && (address < end));
}

static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
			swp_entry_t entry, struct page* page)
{
	unsigned long start = vma->vm_start, end = vma->vm_end;

	if (start >= end)
		BUG();
	do {
		unuse_pgd(vma, pgdir, start, end - start, entry, page);
		start = (start + PGDIR_SIZE) & PGDIR_MASK;
		pgdir++;
	} while (start && (start < end));
}

static void unuse_process(struct mm_struct * mm,
			swp_entry_t entry, struct page* page)
{
	struct vm_area_struct* vma;

	/*
	 * Go through process' page directory.
	 */
	if (!mm)
		return;
	spin_lock(&mm->page_table_lock);
	for (vma = mm->mmap; vma; vma = vma->vm_next) {
		pgd_t * pgd = pgd_offset(mm, vma->vm_start);
		unuse_vma(vma, pgd, entry, page);
	}
	spin_unlock(&mm->page_table_lock);
	return;
}

/*
 * We completely avoid races by reading each swap page in advance,
 * and then search for the process using it.  All the necessary
 * page table adjustments can then be made atomically.
 */
static int try_to_unuse(unsigned int type)
{
	struct swap_info_struct * si = &swap_info[type];
	struct task_struct *p;
	struct page *page;
	swp_entry_t entry;
	int i;

	while (1) {
		/*
		 * Find a swap page in use and read it in.
		 */
		swap_device_lock(si);
		for (i = 1; i < si->max ; i++) {
			if (si->swap_map[i] > 0 && si->swap_map[i] != SWAP_MAP_BAD) {
				/*
				 * Prevent swaphandle from being completely
				 * unused by swap_free while we are trying
				 * to read in the page - this prevents warning
				 * messages from rw_swap_page_base.
				 */
				if (si->swap_map[i] != SWAP_MAP_MAX)
					si->swap_map[i]++;
				swap_device_unlock(si);
				goto found_entry;
			}
		}
		swap_device_unlock(si);
		break;

	found_entry:
		entry = SWP_ENTRY(type, i);

		/* Get a page for the entry, using the existing swap
                   cache page if there is one.  Otherwise, get a clean
                   page and read the swap into it. */
		page = read_swap_cache(entry);
		if (!page) {
			swap_free(entry);
  			return -ENOMEM;
		}
		if (PageSwapCache(page))
			delete_from_swap_cache(page);
		read_lock(&tasklist_lock);
		for_each_task(p)
			unuse_process(p->mm, entry, page);
		read_unlock(&tasklist_lock);
		shmem_unuse(entry, page);
		/* Now get rid of the extra reference to the temporary
                   page we've been using. */
		page_cache_release(page);
		/*
		 * Check for and clear any overflowed swap map counts.
		 */
		swap_free(entry);
		swap_list_lock();
		swap_device_lock(si);
		if (si->swap_map[i] > 0) {
			if (si->swap_map[i] != SWAP_MAP_MAX)
				printk("VM: Undead swap entry %08lx\n", 
								entry.val);
			nr_swap_pages++;
			si->swap_map[i] = 0;
		}
		swap_device_unlock(si);
		swap_list_unlock();
	}
	return 0;
}

asmlinkage long sys_swapoff(const char * specialfile)
{
	struct swap_info_struct * p = NULL;
	struct nameidata nd;
	int i, type, prev;
	int err;
	
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	err = user_path_walk(specialfile, &nd);
	if (err)
		goto out;

	lock_kernel();
	prev = -1;
	swap_list_lock();
	for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
		p = swap_info + type;
		if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
			if (p->swap_file) {
				if (p->swap_file == nd.dentry)
				  break;
			} else {
				if (S_ISBLK(nd.dentry->d_inode->i_mode)
				    && (p->swap_device == nd.dentry->d_inode->i_rdev))
				  break;
			}
		}
		prev = type;
	}
	err = -EINVAL;
	if (type < 0) {
		swap_list_unlock();
		goto out_dput;
	}

	if (prev < 0) {
		swap_list.head = p->next;
	} else {
		swap_info[prev].next = p->next;
	}
	if (type == swap_list.next) {
		/* just pick something that's safe... */
		swap_list.next = swap_list.head;
	}
	nr_swap_pages -= p->pages;
	swap_list_unlock();
	p->flags = SWP_USED;
	err = try_to_unuse(type);
	if (err) {
		/* re-insert swap space back into swap_list */
		swap_list_lock();
		for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next)
			if (p->prio >= swap_info[i].prio)
				break;
		p->next = i;
		if (prev < 0)
			swap_list.head = swap_list.next = p - swap_info;
		else
			swap_info[prev].next = p - swap_info;
		nr_swap_pages += p->pages;
		swap_list_unlock();
		p->flags = SWP_WRITEOK;
		goto out_dput;
	}
	if (p->swap_device)
		blkdev_put(nd.dentry->d_inode->i_bdev, BDEV_SWAP);
	path_release(&nd);

	nd.dentry = p->swap_file;
	p->swap_file = NULL;
	nd.mnt = p->swap_vfsmnt;
	p->swap_vfsmnt = NULL;
	p->swap_device = 0;
	vfree(p->swap_map);
	p->swap_map = NULL;
	p->flags = 0;
	err = 0;

out_dput:
	unlock_kernel();
	path_release(&nd);
out:
	return err;
}

int get_swaparea_info(char *buf)
{
	char * page = (char *) __get_free_page(GFP_KERNEL);
	struct swap_info_struct *ptr = swap_info;
	int i, j, len = 0, usedswap;
12 下一页
💿 文件大小 53 K
👤 上传用户 hndsgzyb
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#linux #内存管理 #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -