📄 vmscan.c
字号:
/* * linux/mm/vmscan.c * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * * Swap reorganised 29.12.95, Stephen Tweedie. * kswapd added: 7.1.96 sct * Removed kswapd_ctl limits, and swap out as many pages as needed * to bring the system back to freepages.high: 2.4.97, Rik van Riel. * Zone aware kswapd started 02/00, Kanoj Sarcar (kanoj@sgi.com). * Multiqueue VM started 5.8.00, Rik van Riel. */#include <linux/slab.h>#include <linux/kernel_stat.h>#include <linux/swap.h>#include <linux/swapctl.h>#include <linux/smp_lock.h>#include <linux/pagemap.h>#include <linux/init.h>#include <linux/highmem.h>#include <linux/file.h>#include <linux/compiler.h>#include <asm/pgalloc.h>/* * The "priority" of VM scanning is how much of the queues we * will scan in one go. A value of 6 for DEF_PRIORITY implies * that we'll scan 1/64th of the queues ("queue_length >> 6") * during a normal aging round. */#define DEF_PRIORITY (6)/* * The swap-out function returns 1 if it successfully * scanned all the pages it was asked to (`count'). * It returns zero if it couldn't do anything, * * rss may decrease because pages are shared, but this * doesn't count as having freed a page. *//* mm->page_table_lock is held. mmap_sem is not held */static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page, zone_t * classzone){ pte_t pte; swp_entry_t entry; /* Don't look at this pte if it's been accessed recently. */ if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) { mark_page_accessed(page); return 0; } /* Don't bother unmapping pages that are active */ if (PageActive(page)) return 0; /* Don't bother replenishing zones not under pressure.. */ if (!memclass(page->zone, classzone)) return 0; if (TryLockPage(page)) return 0; /* From this point on, the odds are that we're going to * nuke this pte, so read and clear the pte. This hook * is needed on CPUs which update the accessed and dirty * bits in hardware. */ flush_cache_page(vma, address); pte = ptep_get_and_clear(page_table); flush_tlb_page(vma, address); if (pte_dirty(pte)) set_page_dirty(page); /* * Is the page already in the swap cache? If so, then * we can just drop our reference to it without doing * any IO - it's already up-to-date on disk. */ if (PageSwapCache(page)) { entry.val = page->index; swap_duplicate(entry);set_swap_pte: set_pte(page_table, swp_entry_to_pte(entry));drop_pte: mm->rss--; UnlockPage(page); memc_clear(vma->vm_mm, page); { int freeable = page_count(page) - !!page->buffers <= 2; page_cache_release(page); return freeable; } } /* * Is it a clean page? Then it must be recoverable * by just paging it in again, and we can just drop * it.. or if it's dirty but has backing store, * just mark the page dirty and drop it. * * However, this won't actually free any real * memory, as the page will just be in the page cache * somewhere, and as such we should just continue * our scan. * * Basically, this just makes it possible for us to do * some real work in the future in "refill_inactive()". */ if (page->mapping) goto drop_pte; if (!PageDirty(page)) goto drop_pte; /* * Anonymous buffercache pages can be left behind by * concurrent truncate and pagefault. */ if (page->buffers) goto preserve; /* * This is a dirty, swappable page. First of all, * get a suitable swap entry for it, and make sure * we have the swap cache set up to associate the * page with that swap entry. */ for (;;) { entry = get_swap_page(); if (!entry.val) break; /* Add it to the swap cache and mark it dirty * (adding to the page cache will clear the dirty * and uptodate bits, so we need to do it again) */ if (add_to_swap_cache(page, entry) == 0) { SetPageUptodate(page); set_page_dirty(page); goto set_swap_pte; } /* Raced with "speculative" read_swap_cache_async */ swap_free(entry); } /* No swap space left */preserve: set_pte(page_table, pte); UnlockPage(page); return 0;}/* mm->page_table_lock is held. mmap_sem is not held */static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone){ pte_t * pte; unsigned long pmd_end; if (pmd_none(*dir)) return count; if (pmd_bad(*dir)) { pmd_ERROR(*dir); pmd_clear(dir); return count; } pte = pte_offset(dir, address); pmd_end = (address + PMD_SIZE) & PMD_MASK; if (end > pmd_end) end = pmd_end; do { if (pte_present(*pte)) { struct page *page = pte_page(*pte); if (VALID_PAGE(page) && !PageReserved(page)) { count -= try_to_swap_out(mm, vma, address, pte, page, classzone); if (!count) { address += PAGE_SIZE; break; } } } address += PAGE_SIZE; pte++; } while (address && (address < end)); mm->swap_address = address; return count;}/* mm->page_table_lock is held. mmap_sem is not held */static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone){ pmd_t * pmd; unsigned long pgd_end; if (pgd_none(*dir)) return count; if (pgd_bad(*dir)) { pgd_ERROR(*dir); pgd_clear(dir); return count; } pmd = pmd_offset(dir, address); pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK; if (pgd_end && (end > pgd_end)) end = pgd_end; do { count = swap_out_pmd(mm, vma, pmd, address, end, count, classzone); if (!count) break; address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); return count;}/* mm->page_table_lock is held. mmap_sem is not held */static inline int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count, zone_t * classzone){ pgd_t *pgdir; unsigned long end; /* Don't swap out areas which are reserved */ if (vma->vm_flags & VM_RESERVED) return count; pgdir = pgd_offset(mm, address); end = vma->vm_end; if (address >= end) BUG(); do { count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone); if (!count) break; address = (address + PGDIR_SIZE) & PGDIR_MASK; pgdir++; } while (address && (address < end)); return count;}/* Placeholder for swap_out(): may be updated by fork.c:mmput() */struct mm_struct *swap_mm = &init_mm;/* * Returns remaining count of pages to be swapped out by followup call. */static inline int swap_out_mm(struct mm_struct * mm, int count, int * mmcounter, zone_t * classzone){ unsigned long address; struct vm_area_struct* vma; /* * Find the proper vm-area after freezing the vma chain * and ptes. */ spin_lock(&mm->page_table_lock); address = mm->swap_address; if (address == TASK_SIZE || swap_mm != mm) { /* We raced: don't count this mm but try again */ ++*mmcounter; goto out_unlock; } vma = find_vma(mm, address); if (vma) { if (address < vma->vm_start) address = vma->vm_start; for (;;) { count = swap_out_vma(mm, vma, address, count, classzone); vma = vma->vm_next; if (!vma) break; if (!count) goto out_unlock; address = vma->vm_start; } } /* Indicate that we reached the end of address space */ mm->swap_address = TASK_SIZE;out_unlock: spin_unlock(&mm->page_table_lock); return count;}static int FASTCALL(swap_out(unsigned int priority, unsigned int gfp_mask, zone_t * classzone));static int swap_out(unsigned int priority, unsigned int gfp_mask, zone_t * classzone){ int counter, nr_pages = SWAP_CLUSTER_MAX; struct mm_struct *mm; counter = mmlist_nr; do { if (unlikely(current->need_resched)) { __set_current_state(TASK_RUNNING); schedule(); } spin_lock(&mmlist_lock); mm = swap_mm; while (mm->swap_address == TASK_SIZE || mm == &init_mm) { mm->swap_address = 0; mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist); if (mm == swap_mm) goto empty; swap_mm = mm; } /* Make sure the mm doesn't disappear when we drop the lock.. */ atomic_inc(&mm->mm_users); spin_unlock(&mmlist_lock); nr_pages = swap_out_mm(mm, nr_pages, &counter, classzone); mmput(mm); if (!nr_pages) return 1; } while (--counter >= 0); return 0;empty: spin_unlock(&mmlist_lock); return 0;}static int FASTCALL(shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority));static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority){ struct list_head * entry; int max_scan = nr_inactive_pages / priority; int max_mapped = min((nr_pages << (10 - priority)), max_scan / 10); spin_lock(&pagemap_lru_lock); while (--max_scan >= 0 && (entry = inactive_list.prev) != &inactive_list) { struct page * page; if (unlikely(current->need_resched)) { spin_unlock(&pagemap_lru_lock); __set_current_state(TASK_RUNNING); schedule(); spin_lock(&pagemap_lru_lock); continue; } page = list_entry(entry, struct page, lru); if (unlikely(!PageLRU(page))) BUG(); if (unlikely(PageActive(page))) BUG(); list_del(entry); list_add(entry, &inactive_list); /* * Zero page counts can happen because we unlink the pages * _after_ decrementing the usage count.. */ if (unlikely(!page_count(page))) continue; if (!memclass(page->zone, classzone)) continue; /* Racy check to avoid trylocking when not worthwhile */ if (!page->buffers && (page_count(page) != 1 || !page->mapping)) goto page_mapped; /* * The page is locked. IO in progress? * Move it to the back of the list.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -