📄 vmscan.c
字号:
/*
* linux/mm/vmscan.c
*
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*
* Swap reorganised 29.12.95, Stephen Tweedie.
* kswapd added: 7.1.96 sct
* Removed kswapd_ctl limits, and swap out as many pages as needed
* to bring the system back to freepages.high: 2.4.97, Rik van Riel.
* Version: $Id: vmscan.c,v 1.5 1998/02/23 22:14:28 sct Exp $
* Zone aware kswapd started 02/00, Kanoj Sarcar (kanoj@sgi.com).
* Multiqueue VM started 5.8.00, Rik van Riel.
*/
#include <linux/slab.h>
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#include <linux/swapctl.h>
#include <linux/smp_lock.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/file.h>
#include <asm/pgalloc.h>
/*
* The swap-out functions return 1 if they successfully
* threw something out, and we got a free page. It returns
* zero if it couldn't do anything, and any other value
* indicates it decreased rss, but the page was shared.
*
* NOTE! If it sleeps, it *must* return 1 to make sure we
* don't continue with the swap-out. Otherwise we may be
* using a process that no longer actually exists (it might
* have died while we slept).
*/
static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask)
{
pte_t pte;
swp_entry_t entry;
struct page * page;
int onlist;
pte = *page_table;
if (!pte_present(pte))
goto out_failed;
page = pte_page(pte);
if ((!VALID_PAGE(page)) || PageReserved(page))
goto out_failed;
if (!mm->swap_cnt)
return 1;
mm->swap_cnt--;
onlist = PageActive(page);
/* Don't look at this pte if it's been accessed recently. */
if (ptep_test_and_clear_young(page_table)) {
age_page_up(page);
goto out_failed;
}
if (!onlist)
/* The page is still mapped, so it can't be freeable... */
age_page_down_ageonly(page);
/*
* If the page is in active use by us, or if the page
* is in active use by others, don't unmap it or
* (worse) start unneeded IO.
*/
if (page->age > 0)
goto out_failed;
if (TryLockPage(page))
goto out_failed;
/* From this point on, the odds are that we're going to
* nuke this pte, so read and clear the pte. This hook
* is needed on CPUs which update the accessed and dirty
* bits in hardware.
*/
pte = ptep_get_and_clear(page_table);
flush_tlb_page(vma, address);
/*
* Is the page already in the swap cache? If so, then
* we can just drop our reference to it without doing
* any IO - it's already up-to-date on disk.
*
* Return 0, as we didn't actually free any real
* memory, and we should just continue our scan.
*/
if (PageSwapCache(page)) {
entry.val = page->index;
if (pte_dirty(pte))
set_page_dirty(page);
set_swap_pte:
swap_duplicate(entry);
set_pte(page_table, swp_entry_to_pte(entry));
drop_pte:
UnlockPage(page);
mm->rss--;
deactivate_page(page);
page_cache_release(page);
out_failed:
return 0;
}
/*
* Is it a clean page? Then it must be recoverable
* by just paging it in again, and we can just drop
* it..
*
* However, this won't actually free any real
* memory, as the page will just be in the page cache
* somewhere, and as such we should just continue
* our scan.
*
* Basically, this just makes it possible for us to do
* some real work in the future in "refill_inactive()".
*/
flush_cache_page(vma, address);
if (!pte_dirty(pte))
goto drop_pte;
/*
* Ok, it's really dirty. That means that
* we should either create a new swap cache
* entry for it, or we should write it back
* to its own backing store.
*/
if (page->mapping) {
set_page_dirty(page);
goto drop_pte;
}
/*
* This is a dirty, swappable page. First of all,
* get a suitable swap entry for it, and make sure
* we have the swap cache set up to associate the
* page with that swap entry.
*/
entry = get_swap_page();
if (!entry.val)
goto out_unlock_restore; /* No swap space left */
/* Add it to the swap cache and mark it dirty */
add_to_swap_cache(page, entry);
set_page_dirty(page);
goto set_swap_pte;
out_unlock_restore:
set_pte(page_table, pte);
UnlockPage(page);
return 0;
}
/*
* A new implementation of swap_out(). We do not swap complete processes,
* but only a small number of blocks, before we continue with the next
* process. The number of blocks actually swapped is determined on the
* number of page faults, that this process actually had in the last time,
* so we won't swap heavily used processes all the time ...
*
* Note: the priority argument is a hint on much CPU to waste with the
* swap block search, not a hint, of how much blocks to swap with
* each process.
*
* (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
*/
static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
{
pte_t * pte;
unsigned long pmd_end;
if (pmd_none(*dir))
return 0;
if (pmd_bad(*dir)) {
pmd_ERROR(*dir);
pmd_clear(dir);
return 0;
}
pte = pte_offset(dir, address);
pmd_end = (address + PMD_SIZE) & PMD_MASK;
if (end > pmd_end)
end = pmd_end;
do {
int result;
mm->swap_address = address + PAGE_SIZE;
result = try_to_swap_out(mm, vma, address, pte, gfp_mask);
if (result)
return result;
address += PAGE_SIZE;
pte++;
} while (address && (address < end));
return 0;
}
static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
{
pmd_t * pmd;
unsigned long pgd_end;
if (pgd_none(*dir))
return 0;
if (pgd_bad(*dir)) {
pgd_ERROR(*dir);
pgd_clear(dir);
return 0;
}
pmd = pmd_offset(dir, address);
pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
if (pgd_end && (end > pgd_end))
end = pgd_end;
do {
int result = swap_out_pmd(mm, vma, pmd, address, end, gfp_mask);
if (result)
return result;
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address && (address < end));
return 0;
}
static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int gfp_mask)
{
pgd_t *pgdir;
unsigned long end;
/* Don't swap out areas which are locked down */
if (vma->vm_flags & (VM_LOCKED|VM_RESERVED))
return 0;
pgdir = pgd_offset(mm, address);
end = vma->vm_end;
if (address >= end)
BUG();
do {
int result = swap_out_pgd(mm, vma, pgdir, address, end, gfp_mask);
if (result)
return result;
address = (address + PGDIR_SIZE) & PGDIR_MASK;
pgdir++;
} while (address && (address < end));
return 0;
}
static int swap_out_mm(struct mm_struct * mm, int gfp_mask)
{
int result = 0;
unsigned long address;
struct vm_area_struct* vma;
/*
* Go through process' page directory.
*/
/*
* Find the proper vm-area after freezing the vma chain
* and ptes.
*/
spin_lock(&mm->page_table_lock);
address = mm->swap_address;
vma = find_vma(mm, address);
if (vma) {
if (address < vma->vm_start)
address = vma->vm_start;
for (;;) {
result = swap_out_vma(mm, vma, address, gfp_mask);
if (result)
goto out_unlock;
vma = vma->vm_next;
if (!vma)
break;
address = vma->vm_start;
}
}
/* Reset to 0 when we reach the end of address space */
mm->swap_address = 0;
mm->swap_cnt = 0;
out_unlock:
spin_unlock(&mm->page_table_lock);
return result;
}
/*
* Select the task with maximal swap_cnt and try to swap out a page.
* N.B. This function returns only 0 or 1. Return values != 1 from
* the lower level routines result in continued processing.
*/
#define SWAP_SHIFT 5
#define SWAP_MIN 8
static int swap_out(unsigned int priority, int gfp_mask)
{
int counter;
int __ret = 0;
/*
* We make one or two passes through the task list, indexed by
* assign = {0, 1}:
* Pass 1: select the swappable task with maximal RSS that has
* not yet been swapped out.
* Pass 2: re-assign rss swap_cnt values, then select as above.
*
* With this approach, there's no need to remember the last task
* swapped out. If the swap-out fails, we clear swap_cnt so the
* task won't be selected again until all others have been tried.
*
* Think of swap_cnt as a "shadow rss" - it tells us which process
* we want to page out (always try largest first).
*/
counter = (nr_threads << SWAP_SHIFT) >> priority;
if (counter < 1)
counter = 1;
for (; counter >= 0; counter--) {
struct list_head *p;
unsigned long max_cnt = 0;
struct mm_struct *best = NULL;
int assign = 0;
int found_task = 0;
select:
spin_lock(&mmlist_lock);
p = init_mm.mmlist.next;
for (; p != &init_mm.mmlist; p = p->next) {
struct mm_struct *mm = list_entry(p, struct mm_struct, mmlist);
if (mm->rss <= 0)
continue;
found_task++;
/* Refresh swap_cnt? */
if (assign == 1) {
mm->swap_cnt = (mm->rss >> SWAP_SHIFT);
if (mm->swap_cnt < SWAP_MIN)
mm->swap_cnt = SWAP_MIN;
}
if (mm->swap_cnt > max_cnt) {
max_cnt = mm->swap_cnt;
best = mm;
}
}
/* Make sure it doesn't disappear */
if (best)
atomic_inc(&best->mm_users);
spin_unlock(&mmlist_lock);
/*
* We have dropped the tasklist_lock, but we
* know that "mm" still exists: we are running
* with the big kernel lock, and exit_mm()
* cannot race with us.
*/
if (!best) {
if (!assign && found_task > 0) {
assign = 1;
goto select;
}
break;
} else {
__ret = swap_out_mm(best, gfp_mask);
mmput(best);
break;
}
}
return __ret;
}
/**
* reclaim_page - reclaims one page from the inactive_clean list
* @zone: reclaim a page from this zone
*
* The pages on the inactive_clean can be instantly reclaimed.
* The tests look impressive, but most of the time we'll grab
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -