📄 memory.c
字号:
* And for normal mappings this is false. * * This restricts such mappings to be a linear translation from virtual address * to pfn. To get around this restriction, we allow arbitrary mappings so long * as the vma is not a COW mapping; in that case, we know that all ptes are * special (because none can have been COWed). * * * In order to support COW of arbitrary special mappings, we have VM_MIXEDMAP. * * VM_MIXEDMAP mappings can likewise contain memory with or without "struct * page" backing, however the difference is that _all_ pages with a struct * page (that is, those where pfn_valid is true) are refcounted and considered * normal pages by the VM. The disadvantage is that pages are refcounted * (which can be slower and simply not an option for some PFNMAP users). The * advantage is that we don't have to follow the strict linearity rule of * PFNMAP mappings in order to support COWable mappings. * */#ifdef __HAVE_ARCH_PTE_SPECIAL# define HAVE_PTE_SPECIAL 1#else# define HAVE_PTE_SPECIAL 0#endifstruct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte){ unsigned long pfn = pte_pfn(pte); if (HAVE_PTE_SPECIAL) { if (likely(!pte_special(pte))) goto check_pfn; if (!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))) print_bad_pte(vma, addr, pte, NULL); return NULL; } /* !HAVE_PTE_SPECIAL case follows: */ if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { if (vma->vm_flags & VM_MIXEDMAP) { if (!pfn_valid(pfn)) return NULL; goto out; } else { unsigned long off; off = (addr - vma->vm_start) >> PAGE_SHIFT; if (pfn == vma->vm_pgoff + off) return NULL; if (!is_cow_mapping(vma->vm_flags)) return NULL; } }check_pfn: if (unlikely(pfn > highest_memmap_pfn)) { print_bad_pte(vma, addr, pte, NULL); return NULL; } /* * NOTE! We still have PageReserved() pages in the page tables. * eg. VDSO mappings can cause them to exist. */out: return pfn_to_page(pfn);}/* * copy one vm_area from one task to the other. Assumes the page tables * already present in the new task to be cleared in the whole range * covered by this vma. */static inline voidcopy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, unsigned long addr, int *rss){ unsigned long vm_flags = vma->vm_flags; pte_t pte = *src_pte; struct page *page; /* pte contains position in swap or file, so copy. */ if (unlikely(!pte_present(pte))) { if (!pte_file(pte)) { swp_entry_t entry = pte_to_swp_entry(pte); swap_duplicate(entry); /* make sure dst_mm is on swapoff's mmlist. */ if (unlikely(list_empty(&dst_mm->mmlist))) { spin_lock(&mmlist_lock); if (list_empty(&dst_mm->mmlist)) list_add(&dst_mm->mmlist, &src_mm->mmlist); spin_unlock(&mmlist_lock); } if (is_write_migration_entry(entry) && is_cow_mapping(vm_flags)) { /* * COW mappings require pages in both parent * and child to be set to read. */ make_migration_entry_read(&entry); pte = swp_entry_to_pte(entry); set_pte_at(src_mm, addr, src_pte, pte); } } goto out_set_pte; } /* * If it's a COW mapping, write protect it both * in the parent and the child */ if (is_cow_mapping(vm_flags)) { ptep_set_wrprotect(src_mm, addr, src_pte); pte = pte_wrprotect(pte); } /* * If it's a shared mapping, mark it clean in * the child */ if (vm_flags & VM_SHARED) pte = pte_mkclean(pte); pte = pte_mkold(pte); page = vm_normal_page(vma, addr, pte); if (page) { get_page(page); page_dup_rmap(page, vma, addr); rss[!!PageAnon(page)]++; }out_set_pte: set_pte_at(dst_mm, addr, dst_pte, pte);}static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, unsigned long addr, unsigned long end){ pte_t *src_pte, *dst_pte; spinlock_t *src_ptl, *dst_ptl; int progress = 0; int rss[2];again: rss[1] = rss[0] = 0; dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); if (!dst_pte) return -ENOMEM; src_pte = pte_offset_map_nested(src_pmd, addr); src_ptl = pte_lockptr(src_mm, src_pmd); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); arch_enter_lazy_mmu_mode(); do { /* * We are holding two locks at this point - either of them * could generate latencies in another task on another CPU. */ if (progress >= 32) { progress = 0; if (need_resched() || spin_needbreak(src_ptl) || spin_needbreak(dst_ptl)) break; } if (pte_none(*src_pte)) { progress++; continue; } copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss); progress += 8; } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); spin_unlock(src_ptl); pte_unmap_nested(src_pte - 1); add_mm_rss(dst_mm, rss[0], rss[1]); pte_unmap_unlock(dst_pte - 1, dst_ptl); cond_resched(); if (addr != end) goto again; return 0;}static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma, unsigned long addr, unsigned long end){ pmd_t *src_pmd, *dst_pmd; unsigned long next; dst_pmd = pmd_alloc(dst_mm, dst_pud, addr); if (!dst_pmd) return -ENOMEM; src_pmd = pmd_offset(src_pud, addr); do { next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(src_pmd)) continue; if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd, vma, addr, next)) return -ENOMEM; } while (dst_pmd++, src_pmd++, addr = next, addr != end); return 0;}static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, unsigned long addr, unsigned long end){ pud_t *src_pud, *dst_pud; unsigned long next; dst_pud = pud_alloc(dst_mm, dst_pgd, addr); if (!dst_pud) return -ENOMEM; src_pud = pud_offset(src_pgd, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(src_pud)) continue; if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud, vma, addr, next)) return -ENOMEM; } while (dst_pud++, src_pud++, addr = next, addr != end); return 0;}int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, struct vm_area_struct *vma){ pgd_t *src_pgd, *dst_pgd; unsigned long next; unsigned long addr = vma->vm_start; unsigned long end = vma->vm_end; int ret; /* * Don't copy ptes where a page fault will fill them correctly. * Fork becomes much lighter when there are big shared or private * readonly mappings. The tradeoff is that copy_page_range is more * efficient than faulting. */ if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) { if (!vma->anon_vma) return 0; } if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); if (unlikely(is_pfn_mapping(vma))) { /* * We do not free on error cases below as remove_vma * gets called on error from higher level routine */ ret = track_pfn_vma_copy(vma); if (ret) return ret; } /* * We need to invalidate the secondary MMU mappings only when * there could be a permission downgrade on the ptes of the * parent mm. And a permission downgrade will only happen if * is_cow_mapping() returns true. */ if (is_cow_mapping(vma->vm_flags)) mmu_notifier_invalidate_range_start(src_mm, addr, end); ret = 0; dst_pgd = pgd_offset(dst_mm, addr); src_pgd = pgd_offset(src_mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(src_pgd)) continue; if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, vma, addr, next))) { ret = -ENOMEM; break; } } while (dst_pgd++, src_pgd++, addr = next, addr != end); if (is_cow_mapping(vma->vm_flags)) mmu_notifier_invalidate_range_end(src_mm, vma->vm_start, end); return ret;}static unsigned long zap_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, long *zap_work, struct zap_details *details){ struct mm_struct *mm = tlb->mm; pte_t *pte; spinlock_t *ptl; int file_rss = 0; int anon_rss = 0; pte = pte_offset_map_lock(mm, pmd, addr, &ptl); arch_enter_lazy_mmu_mode(); do { pte_t ptent = *pte; if (pte_none(ptent)) { (*zap_work)--; continue; } (*zap_work) -= PAGE_SIZE; if (pte_present(ptent)) { struct page *page; page = vm_normal_page(vma, addr, ptent); if (unlikely(details) && page) { /* * unmap_shared_mapping_pages() wants to * invalidate cache without truncating: * unmap shared but keep private pages. */ if (details->check_mapping && details->check_mapping != page->mapping) continue; /* * Each page->index must be checked when * invalidating or truncating nonlinear. */ if (details->nonlinear_vma && (page->index < details->first_index || page->index > details->last_index)) continue; } ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); tlb_remove_tlb_entry(tlb, pte, addr); if (unlikely(!page)) continue; if (unlikely(details) && details->nonlinear_vma && linear_page_index(details->nonlinear_vma, addr) != page->index) set_pte_at(mm, addr, pte, pgoff_to_pte(page->index)); if (PageAnon(page)) anon_rss--; else { if (pte_dirty(ptent)) set_page_dirty(page); if (pte_young(ptent) && likely(!VM_SequentialReadHint(vma))) mark_page_accessed(page); file_rss--; } page_remove_rmap(page); if (unlikely(page_mapcount(page) < 0)) print_bad_pte(vma, addr, ptent, page); tlb_remove_page(tlb, page); continue; } /* * If details->check_mapping, we leave swap entries; * if details->nonlinear_vma, we leave file entries. */ if (unlikely(details)) continue; if (pte_file(ptent)) { if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) print_bad_pte(vma, addr, ptent, NULL); } else if (unlikely(!free_swap_and_cache(pte_to_swp_entry(ptent)))) print_bad_pte(vma, addr, ptent, NULL); pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); add_mm_rss(mm, file_rss, anon_rss); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); return addr;}static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, long *zap_work, struct zap_details *details){ pmd_t *pmd; unsigned long next; pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(pmd)) { (*zap_work)--; continue; } next = zap_pte_range(tlb, vma, pmd, addr, next, zap_work, details); } while (pmd++, addr = next, (addr != end && *zap_work > 0)); return addr;}static inline unsigned long zap_pud_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, long *zap_work, struct zap_details *details){ pud_t *pud; unsigned long next; pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) { (*zap_work)--; continue; } next = zap_pmd_range(tlb, vma, pud, addr, next, zap_work, details); } while (pud++, addr = next, (addr != end && *zap_work > 0)); return addr;}static unsigned long unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, long *zap_work, struct zap_details *details){ pgd_t *pgd; unsigned long next; if (details && !details->check_mapping && !details->nonlinear_vma) details = NULL; BUG_ON(addr >= end); tlb_start_vma(tlb, vma); pgd = pgd_offset(vma->vm_mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) { (*zap_work)--; continue; } next = zap_pud_range(tlb, vma, pgd, addr, next, zap_work, details); } while (pgd++, addr = next, (addr != end && *zap_work > 0)); tlb_end_vma(tlb, vma); return addr;}#ifdef CONFIG_PREEMPT# define ZAP_BLOCK_SIZE (8 * PAGE_SIZE)#else/* No preempt: go for improved straight-line efficiency */# define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE)#endif/**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -