📄 mmap.c
字号:
{ struct mm_struct * mm = current->mm; struct vm_area_struct * vma, * prev; unsigned long flags; struct rb_node ** rb_link, * rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; int error; len = PAGE_ALIGN(len); if (!len) return addr; if ((addr + len) > TASK_SIZE || (addr + len) < addr) return -EINVAL; if (is_hugepage_only_range(mm, addr, len)) return -EINVAL; error = security_file_mmap(NULL, 0, 0, 0, addr, 1); if (error) return error; flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; error = arch_mmap_check(addr, len, flags); if (error) return error; /* * mlock MCL_FUTURE? */ if (mm->def_flags & VM_LOCKED) { unsigned long locked, lock_limit; locked = len >> PAGE_SHIFT; locked += mm->locked_vm; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; } /* * mm->mmap_sem is required to protect against another thread * changing the mappings in case we sleep. */ verify_mm_writelocked(mm); /* * Clear old maps. this also does some error checking for us */ munmap_back: vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); if (vma && vma->vm_start < addr + len) { if (do_munmap(mm, addr, len)) return -ENOMEM; goto munmap_back; } /* Check against address space limits *after* clearing old maps... */ if (!may_expand_vm(mm, len >> PAGE_SHIFT)) return -ENOMEM; if (mm->map_count > sysctl_max_map_count) return -ENOMEM; if (security_vm_enough_memory(len >> PAGE_SHIFT)) return -ENOMEM; /* Can we just expand an old private anonymous mapping? */ vma = vma_merge(mm, prev, addr, addr + len, flags, NULL, NULL, pgoff, NULL); if (vma) goto out; /* * create a vma struct for an anonymous mapping */ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (!vma) { vm_unacct_memory(len >> PAGE_SHIFT); return -ENOMEM; } vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_pgoff = pgoff; vma->vm_flags = flags; vma->vm_page_prot = vm_get_page_prot(flags); vma_link(mm, vma, prev, rb_link, rb_parent);out: mm->total_vm += len >> PAGE_SHIFT; if (flags & VM_LOCKED) { if (!mlock_vma_pages_range(vma, addr, addr + len)) mm->locked_vm += (len >> PAGE_SHIFT); } return addr;}EXPORT_SYMBOL(do_brk);/* Release all mmaps. */void exit_mmap(struct mm_struct *mm){ struct mmu_gather *tlb; struct vm_area_struct *vma; unsigned long nr_accounted = 0; unsigned long end; /* mm's last user has gone, and its about to be pulled down */ mmu_notifier_release(mm); if (mm->locked_vm) { vma = mm->mmap; while (vma) { if (vma->vm_flags & VM_LOCKED) munlock_vma_pages_all(vma); vma = vma->vm_next; } } arch_exit_mmap(mm); vma = mm->mmap; if (!vma) /* Can happen if dup_mmap() received an OOM */ return; lru_add_drain(); flush_cache_mm(mm); tlb = tlb_gather_mmu(mm, 1); /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use -1 here to ensure all VMAs in the mm are unmapped */ end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); vm_unacct_memory(nr_accounted); free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0); tlb_finish_mmu(tlb, 0, end); /* * Walk the list again, actually closing and freeing it, * with preemption enabled, without holding any MM locks. */ while (vma) vma = remove_vma(vma); BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);}/* Insert vm structure into process list sorted by address * and into the inode's i_mmap tree. If vm_file is non-NULL * then i_mmap_lock is taken here. */int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma){ struct vm_area_struct * __vma, * prev; struct rb_node ** rb_link, * rb_parent; /* * The vm_pgoff of a purely anonymous vma should be irrelevant * until its first write fault, when page's anon_vma and index * are set. But now set the vm_pgoff it will almost certainly * end up with (unless mremap moves it elsewhere before that * first wfault), so /proc/pid/maps tells a consistent story. * * By setting it to reflect the virtual start address of the * vma, merges and splits can happen in a seamless way, just * using the existing file pgoff checks and manipulations. * Similarly in do_mmap_pgoff and in do_brk. */ if (!vma->vm_file) { BUG_ON(vma->anon_vma); vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; } __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent); if (__vma && __vma->vm_start < vma->vm_end) return -ENOMEM; if ((vma->vm_flags & VM_ACCOUNT) && security_vm_enough_memory_mm(mm, vma_pages(vma))) return -ENOMEM; vma_link(mm, vma, prev, rb_link, rb_parent); return 0;}/* * Copy the vma structure to a new location in the same mm, * prior to moving page table entries, to effect an mremap move. */struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, unsigned long addr, unsigned long len, pgoff_t pgoff){ struct vm_area_struct *vma = *vmap; unsigned long vma_start = vma->vm_start; struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma, *prev; struct rb_node **rb_link, *rb_parent; struct mempolicy *pol; /* * If anonymous vma has not yet been faulted, update new pgoff * to match new location, to increase its chance of merging. */ if (!vma->vm_file && !vma->anon_vma) pgoff = addr >> PAGE_SHIFT; find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma)); if (new_vma) { /* * Source vma may have been merged into new_vma */ if (vma_start >= new_vma->vm_start && vma_start < new_vma->vm_end) *vmap = new_vma; } else { new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (new_vma) { *new_vma = *vma; pol = mpol_dup(vma_policy(vma)); if (IS_ERR(pol)) { kmem_cache_free(vm_area_cachep, new_vma); return NULL; } vma_set_policy(new_vma, pol); new_vma->vm_start = addr; new_vma->vm_end = addr + len; new_vma->vm_pgoff = pgoff; if (new_vma->vm_file) { get_file(new_vma->vm_file); if (vma->vm_flags & VM_EXECUTABLE) added_exe_file_vma(mm); } if (new_vma->vm_ops && new_vma->vm_ops->open) new_vma->vm_ops->open(new_vma); vma_link(mm, new_vma, prev, rb_link, rb_parent); } } return new_vma;}/* * Return true if the calling process may expand its vm space by the passed * number of pages */int may_expand_vm(struct mm_struct *mm, unsigned long npages){ unsigned long cur = mm->total_vm; /* pages */ unsigned long lim; lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; if (cur + npages > lim) return 0; return 1;}static int special_mapping_fault(struct vm_area_struct *vma, struct vm_fault *vmf){ pgoff_t pgoff; struct page **pages; /* * special mappings have no vm_file, and in that case, the mm * uses vm_pgoff internally. So we have to subtract it from here. * We are allowed to do this because we are the mm; do not copy * this code into drivers! */ pgoff = vmf->pgoff - vma->vm_pgoff; for (pages = vma->vm_private_data; pgoff && *pages; ++pages) pgoff--; if (*pages) { struct page *page = *pages; get_page(page); vmf->page = page; return 0; } return VM_FAULT_SIGBUS;}/* * Having a close hook prevents vma merging regardless of flags. */static void special_mapping_close(struct vm_area_struct *vma){}static struct vm_operations_struct special_mapping_vmops = { .close = special_mapping_close, .fault = special_mapping_fault,};/* * Called with mm->mmap_sem held for writing. * Insert a new vma covering the given region, with the given flags. * Its pages are supplied by the given array of struct page *. * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated. * The region past the last page supplied will always produce SIGBUS. * The array pointer and the pages it points to are assumed to stay alive * for as long as this mapping might exist. */int install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long vm_flags, struct page **pages){ struct vm_area_struct *vma; vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (unlikely(vma == NULL)) return -ENOMEM; vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); vma->vm_ops = &special_mapping_vmops; vma->vm_private_data = pages; if (unlikely(insert_vm_struct(mm, vma))) { kmem_cache_free(vm_area_cachep, vma); return -ENOMEM; } mm->total_vm += len >> PAGE_SHIFT; return 0;}static DEFINE_MUTEX(mm_all_locks_mutex);static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma){ if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) { /* * The LSB of head.next can't change from under us * because we hold the mm_all_locks_mutex. */ spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem); /* * We can safely modify head.next after taking the * anon_vma->lock. If some other vma in this mm shares * the same anon_vma we won't take it again. * * No need of atomic instructions here, head.next * can't change from under us thanks to the * anon_vma->lock. */ if (__test_and_set_bit(0, (unsigned long *) &anon_vma->head.next)) BUG(); }}static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping){ if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { /* * AS_MM_ALL_LOCKS can't change from under us because * we hold the mm_all_locks_mutex. * * Operations on ->flags have to be atomic because * even if AS_MM_ALL_LOCKS is stable thanks to the * mm_all_locks_mutex, there may be other cpus * changing other bitflags in parallel to us. */ if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) BUG(); spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem); }}/* * This operation locks against the VM for all pte/vma/mm related * operations that could ever happen on a certain mm. This includes * vmtruncate, try_to_unmap, and all page faults. * * The caller must take the mmap_sem in write mode before calling * mm_take_all_locks(). The caller isn't allowed to release the * mmap_sem until mm_drop_all_locks() returns. * * mmap_sem in write mode is required in order to block all operations * that could modify pagetables and free pages without need of * altering the vma layout (for example populate_range() with * nonlinear vmas). It's also needed in write mode to avoid new * anon_vmas to be associated with existing vmas. * * A single task can't take more than one mm_take_all_locks() in a row * or it would deadlock. * * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in * mapping->flags avoid to take the same lock twice, if more than one * vma in this mm is backed by the same anon_vma or address_space. * * We can take all the locks in random order because the VM code * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never * takes more than one of them in a row. Secondly we're protected * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. * * mm_take_all_locks() and mm_drop_all_locks are expensive operations * that may have to take thousand of locks. * * mm_take_all_locks() can fail if it's interrupted by signals. */int mm_take_all_locks(struct mm_struct *mm){ struct vm_area_struct *vma; int ret = -EINTR; BUG_ON(down_read_trylock(&mm->mmap_sem)); mutex_lock(&mm_all_locks_mutex); for (vma = mm->mmap; vma; vma = vma->vm_next) { if (signal_pending(current)) goto out_unlock; if (vma->vm_file && vma->vm_file->f_mapping) vm_lock_mapping(mm, vma->vm_file->f_mapping); } for (vma = mm->mmap; vma; vma = vma->vm_next) { if (signal_pending(current)) goto out_unlock; if (vma->anon_vma) vm_lock_anon_vma(mm, vma->anon_vma); } ret = 0;out_unlock: if (ret) mm_drop_all_locks(mm); return ret;}static void vm_unlock_anon_vma(struct anon_vma *anon_vma){ if (test_bit(0, (unsigned long *) &anon_vma->head.next)) { /* * The LSB of head.next can't change to 0 from under * us because we hold the mm_all_locks_mutex. * * We must however clear the bitflag before unlocking * the vma so the users using the anon_vma->head will * never see our bitflag. * * No need of atomic instructions here, head.next * can't change from under us until we release the * anon_vma->lock. */ if (!__test_and_clear_bit(0, (unsigned long *) &anon_vma->head.next)) BUG(); spin_unlock(&anon_vma->lock); }}static void vm_unlock_mapping(struct address_space *mapping){ if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { /* * AS_MM_ALL_LOCKS can't change to 0 from under us * because we hold the mm_all_locks_mutex. */ spin_unlock(&mapping->i_mmap_lock); if (!test_and_clear_bit(AS_MM_ALL_LOCKS, &mapping->flags)) BUG(); }}/* * The mmap_sem cannot be released by the caller until * mm_drop_all_locks() returns. */void mm_drop_all_locks(struct mm_struct *mm){ struct vm_area_struct *vma; BUG_ON(down_read_trylock(&mm->mmap_sem)); BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); for (vma = mm->mmap; vma; vma = vma->vm_next) { if (vma->anon_vma) vm_unlock_anon_
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -