📄 rmap.c
字号:
/* * mm/rmap.c - physical to virtual reverse mappings * * Copyright 2001, Rik van Riel <riel@conectiva.com.br> * Released under the General Public License (GPL). * * Simple, low overhead reverse mapping scheme. * Please try to keep this thing as modular as possible. * * Provides methods for unmapping each kind of mapped page: * the anon methods track anonymous pages, and * the file methods track pages belonging to an inode. * * Original design by Rik van Riel <riel@conectiva.com.br> 2001 * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004 * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004 * Contributions by Hugh Dickins <hugh@veritas.com> 2003, 2004 *//* * Lock ordering in mm: * * inode->i_mutex (while writing or truncating, not reading or faulting) * inode->i_alloc_sem (vmtruncate_range) * mm->mmap_sem * page->flags PG_locked (lock_page) * mapping->i_mmap_lock * anon_vma->lock * mm->page_table_lock or pte_lock * zone->lru_lock (in mark_page_accessed, isolate_lru_page) * swap_lock (in swap_duplicate, swap_info_get) * mmlist_lock (in mmput, drain_mmlist and others) * mapping->private_lock (in __set_page_dirty_buffers) * inode_lock (in set_page_dirty's __mark_inode_dirty) * sb_lock (within inode_lock in fs/fs-writeback.c) * mapping->tree_lock (widely used, in set_page_dirty, * in arch-dependent flush_dcache_mmap_lock, * within inode_lock in __sync_single_inode) */#include <linux/mm.h>#include <linux/pagemap.h>#include <linux/swap.h>#include <linux/swapops.h>#include <linux/slab.h>#include <linux/init.h>#include <linux/rmap.h>#include <linux/rcupdate.h>#include <linux/module.h>#include <linux/memcontrol.h>#include <linux/mmu_notifier.h>#include <linux/migrate.h>#include <asm/tlbflush.h>#include "internal.h"static struct kmem_cache *anon_vma_cachep;static inline struct anon_vma *anon_vma_alloc(void){ return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);}static inline void anon_vma_free(struct anon_vma *anon_vma){ kmem_cache_free(anon_vma_cachep, anon_vma);}/** * anon_vma_prepare - attach an anon_vma to a memory region * @vma: the memory region in question * * This makes sure the memory mapping described by 'vma' has * an 'anon_vma' attached to it, so that we can associate the * anonymous pages mapped into it with that anon_vma. * * The common case will be that we already have one, but if * if not we either need to find an adjacent mapping that we * can re-use the anon_vma from (very common when the only * reason for splitting a vma has been mprotect()), or we * allocate a new one. * * Anon-vma allocations are very subtle, because we may have * optimistically looked up an anon_vma in page_lock_anon_vma() * and that may actually touch the spinlock even in the newly * allocated vma (it depends on RCU to make sure that the * anon_vma isn't actually destroyed). * * As a result, we need to do proper anon_vma locking even * for the new allocation. At the same time, we do not want * to do any locking for the common case of already having * an anon_vma. * * This must be called with the mmap_sem held for reading. */int anon_vma_prepare(struct vm_area_struct *vma){ struct anon_vma *anon_vma = vma->anon_vma; might_sleep(); if (unlikely(!anon_vma)) { struct mm_struct *mm = vma->vm_mm; struct anon_vma *allocated; anon_vma = find_mergeable_anon_vma(vma); allocated = NULL; if (!anon_vma) { anon_vma = anon_vma_alloc(); if (unlikely(!anon_vma)) return -ENOMEM; allocated = anon_vma; } spin_lock(&anon_vma->lock); /* page_table_lock to protect against threads */ spin_lock(&mm->page_table_lock); if (likely(!vma->anon_vma)) { vma->anon_vma = anon_vma; list_add_tail(&vma->anon_vma_node, &anon_vma->head); allocated = NULL; } spin_unlock(&mm->page_table_lock); spin_unlock(&anon_vma->lock); if (unlikely(allocated)) anon_vma_free(allocated); } return 0;}void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next){ BUG_ON(vma->anon_vma != next->anon_vma); list_del(&next->anon_vma_node);}void __anon_vma_link(struct vm_area_struct *vma){ struct anon_vma *anon_vma = vma->anon_vma; if (anon_vma) list_add_tail(&vma->anon_vma_node, &anon_vma->head);}void anon_vma_link(struct vm_area_struct *vma){ struct anon_vma *anon_vma = vma->anon_vma; if (anon_vma) { spin_lock(&anon_vma->lock); list_add_tail(&vma->anon_vma_node, &anon_vma->head); spin_unlock(&anon_vma->lock); }}void anon_vma_unlink(struct vm_area_struct *vma){ struct anon_vma *anon_vma = vma->anon_vma; int empty; if (!anon_vma) return; spin_lock(&anon_vma->lock); list_del(&vma->anon_vma_node); /* We must garbage collect the anon_vma if it's empty */ empty = list_empty(&anon_vma->head); spin_unlock(&anon_vma->lock); if (empty) anon_vma_free(anon_vma);}static void anon_vma_ctor(void *data){ struct anon_vma *anon_vma = data; spin_lock_init(&anon_vma->lock); INIT_LIST_HEAD(&anon_vma->head);}void __init anon_vma_init(void){ anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);}/* * Getting a lock on a stable anon_vma from a page off the LRU is * tricky: page_lock_anon_vma rely on RCU to guard against the races. */static struct anon_vma *page_lock_anon_vma(struct page *page){ struct anon_vma *anon_vma; unsigned long anon_mapping; rcu_read_lock(); anon_mapping = (unsigned long) page->mapping; if (!(anon_mapping & PAGE_MAPPING_ANON)) goto out; if (!page_mapped(page)) goto out; anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); spin_lock(&anon_vma->lock); return anon_vma;out: rcu_read_unlock(); return NULL;}static void page_unlock_anon_vma(struct anon_vma *anon_vma){ spin_unlock(&anon_vma->lock); rcu_read_unlock();}/* * At what user virtual address is page expected in @vma? * Returns virtual address or -EFAULT if page's index/offset is not * within the range mapped the @vma. */static inline unsigned longvma_address(struct page *page, struct vm_area_struct *vma){ pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); unsigned long address; address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); if (unlikely(address < vma->vm_start || address >= vma->vm_end)) { /* page should be within @vma mapping range */ return -EFAULT; } return address;}/* * At what user virtual address is page expected in vma? checking that the * page matches the vma: currently only used on anon pages, by unuse_vma; */unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma){ if (PageAnon(page)) { if ((void *)vma->anon_vma != (void *)page->mapping - PAGE_MAPPING_ANON) return -EFAULT; } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping) return -EFAULT; } else return -EFAULT; return vma_address(page, vma);}/* * Check that @page is mapped at @address into @mm. * * If @sync is false, page_check_address may perform a racy check to avoid * the page table lock when the pte is not present (helpful when reclaiming * highly shared pages). * * On success returns with pte mapped and locked. */pte_t *page_check_address(struct page *page, struct mm_struct *mm, unsigned long address, spinlock_t **ptlp, int sync){ pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; spinlock_t *ptl; pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) return NULL; pud = pud_offset(pgd, address); if (!pud_present(*pud)) return NULL; pmd = pmd_offset(pud, address); if (!pmd_present(*pmd)) return NULL; pte = pte_offset_map(pmd, address); /* Make a quick check before getting the lock */ if (!sync && !pte_present(*pte)) { pte_unmap(pte); return NULL; } ptl = pte_lockptr(mm, pmd); spin_lock(ptl); if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) { *ptlp = ptl; return pte; } pte_unmap_unlock(pte, ptl); return NULL;}/** * page_mapped_in_vma - check whether a page is really mapped in a VMA * @page: the page to test * @vma: the VMA to test * * Returns 1 if the page is mapped into the page tables of the VMA, 0 * if the page is not mapped into the page tables of this VMA. Only * valid for normal file or anonymous VMAs. */static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma){ unsigned long address; pte_t *pte; spinlock_t *ptl; address = vma_address(page, vma); if (address == -EFAULT) /* out of vma range */ return 0; pte = page_check_address(page, vma->vm_mm, address, &ptl, 1); if (!pte) /* the page is not in this mm */ return 0; pte_unmap_unlock(pte, ptl); return 1;}/* * Subfunctions of page_referenced: page_referenced_one called * repeatedly from either page_referenced_anon or page_referenced_file. */static int page_referenced_one(struct page *page, struct vm_area_struct *vma, unsigned int *mapcount){ struct mm_struct *mm = vma->vm_mm; unsigned long address; pte_t *pte; spinlock_t *ptl; int referenced = 0; address = vma_address(page, vma); if (address == -EFAULT) goto out; pte = page_check_address(page, mm, address, &ptl, 0); if (!pte) goto out; /* * Don't want to elevate referenced for mlocked page that gets this far, * in order that it progresses to try_to_unmap and is moved to the * unevictable list. */ if (vma->vm_flags & VM_LOCKED) { *mapcount = 1; /* break early from loop */ goto out_unmap; } if (ptep_clear_flush_young_notify(vma, address, pte)) { /* * Don't treat a reference through a sequentially read * mapping as such. If the page has been used in * another mapping, we will catch it; if this other * mapping is already gone, the unmap path will have * set PG_referenced or activated the page. */ if (likely(!VM_SequentialReadHint(vma))) referenced++; } /* Pretend the page is referenced if the task has the swap token and is in the middle of a page fault. */ if (mm != current->mm && has_swap_token(mm) && rwsem_is_locked(&mm->mmap_sem)) referenced++;out_unmap: (*mapcount)--; pte_unmap_unlock(pte, ptl);out: return referenced;}static int page_referenced_anon(struct page *page, struct mem_cgroup *mem_cont){ unsigned int mapcount; struct anon_vma *anon_vma; struct vm_area_struct *vma; int referenced = 0; anon_vma = page_lock_anon_vma(page); if (!anon_vma) return referenced; mapcount = page_mapcount(page); list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { /* * If we are reclaiming on behalf of a cgroup, skip * counting on behalf of references from different * cgroups */ if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) continue; referenced += page_referenced_one(page, vma, &mapcount); if (!mapcount) break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -