📄 memory.c
字号:
/* * linux/mm/memory.c * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds *//* * demand-loading started 01.12.91 - seems it is high on the list of * things wanted, and it should be easy to implement. - Linus *//* * Ok, demand-loading was easy, shared pages a little bit tricker. Shared * pages started 02.12.91, seems to work. - Linus. * * Tested sharing by executing about 30 /bin/sh: under the old kernel it * would have taken more than the 6M I have free, but it worked well as * far as I could see. * * Also corrected some "invalidate()"s - I wasn't doing enough of them. *//* * Real VM (paging to/from disk) started 18.12.91. Much more work and * thought has to go into this. Oh, well.. * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why. * Found it. Everything seems to work now. * 20.12.91 - Ok, making the swap-device changeable like the root. *//* * 05.04.94 - Multi-page memory management added for v1.1. * Idea by Alex Bligh (alex@cconcepts.co.uk) * * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG * (Gerhard.Wichert@pdb.siemens.de) */#include <linux/mm.h>#include <linux/mman.h>#include <linux/swap.h>#include <linux/smp_lock.h>#include <linux/swapctl.h>#include <linux/iobuf.h>#include <linux/highmem.h>#include <linux/pagemap.h>#include <asm/pgalloc.h>#include <asm/uaccess.h>#include <asm/tlb.h>unsigned long max_mapnr;unsigned long num_physpages;void * high_memory;struct page *highmem_start_page;/* * We special-case the C-O-W ZERO_PAGE, because it's such * a common occurrence (no need to read the page to know * that it's zero - better for the cache and memory subsystem). */static inline void copy_cow_page(struct page * from, struct page * to, unsigned long address){ if (from == ZERO_PAGE(address)) { clear_user_highpage(to, address); return; } copy_user_highpage(to, from, address);}mem_map_t * mem_map;/* * Called by TLB shootdown */void __free_pte(pte_t pte){ struct page *page = pte_page(pte); if ((!VALID_PAGE(page)) || PageReserved(page)) return; if (pte_dirty(pte)) set_page_dirty(page); free_page_and_swap_cache(page);}/* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. */static inline void free_one_pmd(pmd_t * dir){ pte_t * pte; if (pmd_none(*dir)) return; if (pmd_bad(*dir)) { pmd_ERROR(*dir); pmd_clear(dir); return; } pte = pte_offset(dir, 0); pmd_clear(dir); pte_free(pte);}static inline void free_one_pgd(pgd_t * dir){ int j; pmd_t * pmd; if (pgd_none(*dir)) return; if (pgd_bad(*dir)) { pgd_ERROR(*dir); pgd_clear(dir); return; } pmd = pmd_offset(dir, 0); pgd_clear(dir); for (j = 0; j < PTRS_PER_PMD ; j++) { prefetchw(pmd+j+(PREFETCH_STRIDE/16)); free_one_pmd(pmd+j); } pmd_free(pmd);}/* Low and high watermarks for page table cache. The system should try to have pgt_water[0] <= cache elements <= pgt_water[1] */int pgt_cache_water[2] = { 25, 50 };/* Returns the number of pages freed */int check_pgt_cache(void){ return do_check_pgt_cache(pgt_cache_water[0], pgt_cache_water[1]);}/* * This function clears all user-level page tables of a process - this * is needed by execve(), so that old pages aren't in the way. */void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr){ pgd_t * page_dir = mm->pgd; spin_lock(&mm->page_table_lock); page_dir += first; do { free_one_pgd(page_dir); page_dir++; } while (--nr); spin_unlock(&mm->page_table_lock); /* keep the page table cache within bounds */ check_pgt_cache();}#define PTE_TABLE_MASK ((PTRS_PER_PTE-1) * sizeof(pte_t))#define PMD_TABLE_MASK ((PTRS_PER_PMD-1) * sizeof(pmd_t))/* * copy one vm_area from one task to the other. Assumes the page tables * already present in the new task to be cleared in the whole range * covered by this vma. * * 08Jan98 Merged into one routine from several inline routines to reduce * variable count and make things faster. -jj * * dst->page_table_lock is held on entry and exit, * but may be dropped within pmd_alloc() and pte_alloc(). */int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma){ pgd_t * src_pgd, * dst_pgd; unsigned long address = vma->vm_start; unsigned long end = vma->vm_end; unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; src_pgd = pgd_offset(src, address)-1; dst_pgd = pgd_offset(dst, address)-1; for (;;) { pmd_t * src_pmd, * dst_pmd; src_pgd++; dst_pgd++; /* copy_pmd_range */ if (pgd_none(*src_pgd)) goto skip_copy_pmd_range; if (pgd_bad(*src_pgd)) { pgd_ERROR(*src_pgd); pgd_clear(src_pgd);skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK; if (!address || (address >= end)) goto out; continue; } src_pmd = pmd_offset(src_pgd, address); dst_pmd = pmd_alloc(dst, dst_pgd, address); if (!dst_pmd) goto nomem; do { pte_t * src_pte, * dst_pte; /* copy_pte_range */ if (pmd_none(*src_pmd)) goto skip_copy_pte_range; if (pmd_bad(*src_pmd)) { pmd_ERROR(*src_pmd); pmd_clear(src_pmd);skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; if (address >= end) goto out; goto cont_copy_pmd_range; } src_pte = pte_offset(src_pmd, address); dst_pte = pte_alloc(dst, dst_pmd, address); if (!dst_pte) goto nomem; spin_lock(&src->page_table_lock); do { pte_t pte = *src_pte; struct page *ptepage; /* copy_one_pte */ if (pte_none(pte)) goto cont_copy_pte_range_noset; if (!pte_present(pte)) { swap_duplicate(pte_to_swp_entry(pte)); goto cont_copy_pte_range; } ptepage = pte_page(pte); if ((!VALID_PAGE(ptepage)) || PageReserved(ptepage)) goto cont_copy_pte_range; /* If it's a COW mapping, write protect it both in the parent and the child */ if (cow && pte_write(pte)) { ptep_set_wrprotect(src_pte); pte = *src_pte; } /* If it's a shared mapping, mark it clean in the child */ if (vma->vm_flags & VM_SHARED) pte = pte_mkclean(pte); pte = pte_mkold(pte); get_page(ptepage); dst->rss++;cont_copy_pte_range: set_pte(dst_pte, pte);cont_copy_pte_range_noset: address += PAGE_SIZE; if (address >= end) goto out_unlock; src_pte++; dst_pte++; } while ((unsigned long)src_pte & PTE_TABLE_MASK); spin_unlock(&src->page_table_lock); cont_copy_pmd_range: src_pmd++; dst_pmd++; } while ((unsigned long)src_pmd & PMD_TABLE_MASK); }out_unlock: spin_unlock(&src->page_table_lock);out: return 0;nomem: return -ENOMEM;}/* * Return indicates whether a page was freed so caller can adjust rss */static inline void forget_pte(pte_t page){ if (!pte_none(page)) { printk("forget_pte: old mapping existed!\n"); BUG(); }}static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size){ unsigned long offset; pte_t * ptep; int freed = 0; if (pmd_none(*pmd)) return 0; if (pmd_bad(*pmd)) { pmd_ERROR(*pmd); pmd_clear(pmd); return 0; } ptep = pte_offset(pmd, address); offset = address & ~PMD_MASK; if (offset + size > PMD_SIZE) size = PMD_SIZE - offset; size &= PAGE_MASK; for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) { pte_t pte = *ptep; if (pte_none(pte)) continue; if (pte_present(pte)) { struct page *page = pte_page(pte); if (VALID_PAGE(page) && !PageReserved(page)) freed ++; /* This will eventually call __free_pte on the pte. */ tlb_remove_page(tlb, ptep, address + offset); } else { free_swap_and_cache(pte_to_swp_entry(pte)); pte_clear(ptep); } } return freed;}static inline int zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size){ pmd_t * pmd; unsigned long end; int freed; if (pgd_none(*dir)) return 0; if (pgd_bad(*dir)) { pgd_ERROR(*dir); pgd_clear(dir); return 0; } pmd = pmd_offset(dir, address); end = address + size; if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) end = ((address + PGDIR_SIZE) & PGDIR_MASK); freed = 0; do { freed += zap_pte_range(tlb, pmd, address, end - address); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); return freed;}void unmap_page_range(mmu_gather_t *tlb, struct mm_struct *mm, unsigned long address, unsigned long end){ int freed = 0; pgd_t * dir; if (address >= end) BUG(); dir = pgd_offset(mm, address); do { freed += zap_pmd_range(tlb, dir, address, end - address); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); /* * Update rss for the mm_struct (not necessarily current->mm) * Notice that rss is an unsigned long. */ if (mm->rss > freed) mm->rss -= freed; else mm->rss = 0;}/* * remove user pages in a given range. */void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size){ unsigned long start = address, end = address + size; mmu_gather_t *tlb; /* * This is a long-lived spinlock. That's fine. * There's no contention, because the page table * lock only protects against kswapd anyway, and * even if kswapd happened to be looking at this * process we _want_ it to get stuck. */ if (address >= end) BUG(); spin_lock(&mm->page_table_lock); flush_cache_range(mm, address, end); tlb = tlb_gather_mmu(mm); unmap_page_range(tlb, mm, address, end); tlb_finish_mmu(tlb, start, end); spin_unlock(&mm->page_table_lock);}/* * Do a quick page-table lookup for a single page. */static struct page * follow_page(struct mm_struct *mm, unsigned long address, int write) { pgd_t *pgd; pmd_t *pmd; pte_t *ptep, pte; pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || pgd_bad(*pgd)) goto out; pmd = pmd_offset(pgd, address); if (pmd_none(*pmd) || pmd_bad(*pmd)) goto out; ptep = pte_offset(pmd, address); if (!ptep) goto out; pte = *ptep; if (pte_present(pte)) { if (!write || (pte_write(pte) && pte_dirty(pte))) return pte_page(pte); }out: return 0;}/* * Given a physical address, is there a useful struct page pointing to * it? This may become more complex in the future if we start dealing * with IO-aperture pages in kiobufs. */static inline struct page * get_page_map(struct page *page){ if (!VALID_PAGE(page)) return 0; return page;}/* * Please read Documentation/cachetlb.txt before using this function, * accessing foreign memory spaces can cause cache coherency problems. * * Accessing a VM_IO area is even more dangerous, therefore the function * fails if pages is != NULL and a VM_IO area is found. */int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas){ int i; unsigned int flags; /* * Require read or write permissions. * If 'force' is set, we only require the "MAY" flags. */ flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); i = 0; do { struct vm_area_struct * vma; vma = find_extend_vma(mm, start); if ( !vma || (pages && vma->vm_flags & VM_IO) || !(flags & vma->vm_flags) ) return i ? : -EFAULT; spin_lock(&mm->page_table_lock); do { struct page *map; while (!(map = follow_page(mm, start, write))) { spin_unlock(&mm->page_table_lock); switch (handle_mm_fault(mm, vma, start, write)) { case 1: tsk->min_flt++; break; case 2: tsk->maj_flt++; break; case 0: if (i) return i;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -