📄 mmap.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* * mm/mmap.c * * Written by obz. * * Address space accounting code	<alan@lxorguk.ukuu.org.uk> */#include <linux/slab.h>#include <linux/backing-dev.h>#include <linux/mm.h>#include <linux/shm.h>#include <linux/mman.h>#include <linux/pagemap.h>#include <linux/swap.h>#include <linux/syscalls.h>#include <linux/capability.h>#include <linux/init.h>#include <linux/file.h>#include <linux/fs.h>#include <linux/personality.h>#include <linux/security.h>#include <linux/hugetlb.h>#include <linux/profile.h>#include <linux/module.h>#include <linux/mount.h>#include <linux/mempolicy.h>#include <linux/rmap.h>#include <linux/mmu_notifier.h>#include <asm/uaccess.h>#include <asm/cacheflush.h>#include <asm/tlb.h>#include <asm/mmu_context.h>#include "internal.h"#ifndef arch_mmap_check#define arch_mmap_check(addr, len, flags)	(0)#endif#ifndef arch_rebalance_pgtables#define arch_rebalance_pgtables(addr, len)		(addr)#endifstatic void unmap_region(struct mm_struct *mm,		struct vm_area_struct *vma, struct vm_area_struct *prev,		unsigned long start, unsigned long end);/* * WARNING: the debugging will use recursive algorithms so never enable this * unless you know what you are doing. */#undef DEBUG_MM_RB/* description of effects of mapping type and prot in current implementation. * this is due to the limited x86 page protection hardware.  The expected * behavior is in parens: * * map_type	prot *		PROT_NONE	PROT_READ	PROT_WRITE	PROT_EXEC * MAP_SHARED	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes *		w: (no) no	w: (no) no	w: (yes) yes	w: (no) no *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes *		 * MAP_PRIVATE	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes *		w: (no) no	w: (no) no	w: (copy) copy	w: (no) no *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes * */pgprot_t protection_map[16] = {	__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,	__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111};pgprot_t vm_get_page_prot(unsigned long vm_flags){	return __pgprot(pgprot_val(protection_map[vm_flags &				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |			pgprot_val(arch_vm_get_page_prot(vm_flags)));}EXPORT_SYMBOL(vm_get_page_prot);int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */int sysctl_overcommit_ratio = 50;	/* default is 50% */int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);/* * Check that a process has enough memory to allocate a new virtual * mapping. 0 means there is enough memory for the allocation to * succeed and -ENOMEM implies there is not. * * We currently support three overcommit policies, which are set via the * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting * * Strict overcommit modes added 2002 Feb 26 by Alan Cox. * Additional code 2002 Jul 20 by Robert Love. * * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. * * Note this is a helper function intended to be used by LSMs which * wish to use this logic. */int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin){	unsigned long free, allowed;	vm_acct_memory(pages);	/*	 * Sometimes we want to use more memory than we have	 */	if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)		return 0;	if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {		unsigned long n;		free = global_page_state(NR_FILE_PAGES);		free += nr_swap_pages;		/*		 * Any slabs which are created with the		 * SLAB_RECLAIM_ACCOUNT flag claim to have contents		 * which are reclaimable, under pressure.  The dentry		 * cache and most inode caches should fall into this		 */		free += global_page_state(NR_SLAB_RECLAIMABLE);		/*		 * Leave the last 3% for root		 */		if (!cap_sys_admin)			free -= free / 32;		if (free > pages)			return 0;		/*		 * nr_free_pages() is very expensive on large systems,		 * only call if we're about to fail.		 */		n = nr_free_pages();		/*		 * Leave reserved pages. The pages are not for anonymous pages.		 */		if (n <= totalreserve_pages)			goto error;		else			n -= totalreserve_pages;		/*		 * Leave the last 3% for root		 */		if (!cap_sys_admin)			n -= n / 32;		free += n;		if (free > pages)			return 0;		goto error;	}	allowed = (totalram_pages - hugetlb_total_pages())	       	* sysctl_overcommit_ratio / 100;	/*	 * Leave the last 3% for root	 */	if (!cap_sys_admin)		allowed -= allowed / 32;	allowed += total_swap_pages;	/* Don't let a single process grow too big:	   leave 3% of the size of this process for other processes */	if (mm)		allowed -= mm->total_vm / 32;	/*	 * cast `allowed' as a signed long because vm_committed_space	 * sometimes has a negative value	 */	if (atomic_long_read(&vm_committed_space) < (long)allowed)		return 0;error:	vm_unacct_memory(pages);	return -ENOMEM;}/* * Requires inode->i_mapping->i_mmap_lock */static void __remove_shared_vm_struct(struct vm_area_struct *vma,		struct file *file, struct address_space *mapping){	if (vma->vm_flags & VM_DENYWRITE)		atomic_inc(&file->f_path.dentry->d_inode->i_writecount);	if (vma->vm_flags & VM_SHARED)		mapping->i_mmap_writable--;	flush_dcache_mmap_lock(mapping);	if (unlikely(vma->vm_flags & VM_NONLINEAR))		list_del_init(&vma->shared.vm_set.list);	else		vma_prio_tree_remove(vma, &mapping->i_mmap);	flush_dcache_mmap_unlock(mapping);}/* * Unlink a file-based vm structure from its prio_tree, to hide * vma from rmap and vmtruncate before freeing its page tables. */void unlink_file_vma(struct vm_area_struct *vma){	struct file *file = vma->vm_file;	if (file) {		struct address_space *mapping = file->f_mapping;		spin_lock(&mapping->i_mmap_lock);		__remove_shared_vm_struct(vma, file, mapping);		spin_unlock(&mapping->i_mmap_lock);	}}/* * Close a vm structure and free it, returning the next. */static struct vm_area_struct *remove_vma(struct vm_area_struct *vma){	struct vm_area_struct *next = vma->vm_next;	might_sleep();	if (vma->vm_ops && vma->vm_ops->close)		vma->vm_ops->close(vma);	if (vma->vm_file) {		fput(vma->vm_file);		if (vma->vm_flags & VM_EXECUTABLE)			removed_exe_file_vma(vma->vm_mm);	}	mpol_put(vma_policy(vma));	kmem_cache_free(vm_area_cachep, vma);	return next;}SYSCALL_DEFINE1(brk, unsigned long, brk){	unsigned long rlim, retval;	unsigned long newbrk, oldbrk;	struct mm_struct *mm = current->mm;	unsigned long min_brk;	down_write(&mm->mmap_sem);#ifdef CONFIG_COMPAT_BRK	min_brk = mm->end_code;#else	min_brk = mm->start_brk;#endif	if (brk < min_brk)		goto out;	/*	 * Check against rlimit here. If this check is done later after the test	 * of oldbrk with newbrk then it can escape the test and let the data	 * segment grow beyond its set limit the in case where the limit is	 * not page aligned -Ram Gupta	 */	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;	if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +			(mm->end_data - mm->start_data) > rlim)		goto out;	newbrk = PAGE_ALIGN(brk);	oldbrk = PAGE_ALIGN(mm->brk);	if (oldbrk == newbrk)		goto set_brk;	/* Always allow shrinking brk. */	if (brk <= mm->brk) {		if (!do_munmap(mm, newbrk, oldbrk-newbrk))			goto set_brk;		goto out;	}	/* Check against existing mmap mappings. */	if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))		goto out;	/* Ok, looks good - let it rip. */	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)		goto out;set_brk:	mm->brk = brk;out:	retval = mm->brk;	up_write(&mm->mmap_sem);	return retval;}#ifdef DEBUG_MM_RBstatic int browse_rb(struct rb_root *root){	int i = 0, j;	struct rb_node *nd, *pn = NULL;	unsigned long prev = 0, pend = 0;	for (nd = rb_first(root); nd; nd = rb_next(nd)) {		struct vm_area_struct *vma;		vma = rb_entry(nd, struct vm_area_struct, vm_rb);		if (vma->vm_start < prev)			printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;		if (vma->vm_start < pend)			printk("vm_start %lx pend %lx\n", vma->vm_start, pend);		if (vma->vm_start > vma->vm_end)			printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);		i++;		pn = nd;		prev = vma->vm_start;		pend = vma->vm_end;	}	j = 0;	for (nd = pn; nd; nd = rb_prev(nd)) {		j++;	}	if (i != j)		printk("backwards %d, forwards %d\n", j, i), i = 0;	return i;}void validate_mm(struct mm_struct *mm){	int bug = 0;	int i = 0;	struct vm_area_struct *tmp = mm->mmap;	while (tmp) {		tmp = tmp->vm_next;		i++;	}	if (i != mm->map_count)		printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;	i = browse_rb(&mm->mm_rb);	if (i != mm->map_count)		printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;	BUG_ON(bug);}#else#define validate_mm(mm) do { } while (0)#endifstatic struct vm_area_struct *find_vma_prepare(struct mm_struct *mm, unsigned long addr,		struct vm_area_struct **pprev, struct rb_node ***rb_link,		struct rb_node ** rb_parent){	struct vm_area_struct * vma;	struct rb_node ** __rb_link, * __rb_parent, * rb_prev;	__rb_link = &mm->mm_rb.rb_node;	rb_prev = __rb_parent = NULL;	vma = NULL;	while (*__rb_link) {		struct vm_area_struct *vma_tmp;		__rb_parent = *__rb_link;		vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);		if (vma_tmp->vm_end > addr) {			vma = vma_tmp;			if (vma_tmp->vm_start <= addr)				break;			__rb_link = &__rb_parent->rb_left;		} else {			rb_prev = __rb_parent;			__rb_link = &__rb_parent->rb_right;		}	}	*pprev = NULL;	if (rb_prev)		*pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);	*rb_link = __rb_link;	*rb_parent = __rb_parent;	return vma;}static inline void__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,		struct vm_area_struct *prev, struct rb_node *rb_parent){	if (prev) {		vma->vm_next = prev->vm_next;		prev->vm_next = vma;	} else {		mm->mmap = vma;		if (rb_parent)			vma->vm_next = rb_entry(rb_parent,					struct vm_area_struct, vm_rb);		else			vma->vm_next = NULL;	}}void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,		struct rb_node **rb_link, struct rb_node *rb_parent){	rb_link_node(&vma->vm_rb, rb_parent, rb_link);	rb_insert_color(&vma->vm_rb, &mm->mm_rb);}static void __vma_link_file(struct vm_area_struct *vma){	struct file *file;	file = vma->vm_file;	if (file) {		struct address_space *mapping = file->f_mapping;		if (vma->vm_flags & VM_DENYWRITE)			atomic_dec(&file->f_path.dentry->d_inode->i_writecount);		if (vma->vm_flags & VM_SHARED)			mapping->i_mmap_writable++;		flush_dcache_mmap_lock(mapping);		if (unlikely(vma->vm_flags & VM_NONLINEAR))			vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);		else			vma_prio_tree_insert(vma, &mapping->i_mmap);		flush_dcache_mmap_unlock(mapping);	}}static void__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,	struct vm_area_struct *prev, struct rb_node **rb_link,	struct rb_node *rb_parent){	__vma_link_list(mm, vma, prev, rb_parent);	__vma_link_rb(mm, vma, rb_link, rb_parent);	__anon_vma_link(vma);}static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,			struct vm_area_struct *prev, struct rb_node **rb_link,			struct rb_node *rb_parent){	struct address_space *mapping = NULL;	if (vma->vm_file)		mapping = vma->vm_file->f_mapping;	if (mapping) {		spin_lock(&mapping->i_mmap_lock);		vma->vm_truncate_count = mapping->truncate_count;	}	anon_vma_lock(vma);	__vma_link(mm, vma, prev, rb_link, rb_parent);	__vma_link_file(vma);	anon_vma_unlock(vma);	if (mapping)		spin_unlock(&mapping->i_mmap_lock);	mm->map_count++;	validate_mm(mm);}/* * Helper for vma_adjust in the split_vma insert case: * insert vm structure into list and rbtree and anon_vma, * but it has already been inserted into prio_tree earlier. */static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma){	struct vm_area_struct *__vma, *prev;	struct rb_node **rb_link, *rb_parent;	__vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);	BUG_ON(__vma && __vma->vm_start < vma->vm_end);	__vma_link(mm, vma, prev, rb_link, rb_parent);	mm->map_count++;}static inline void__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,		struct vm_area_struct *prev){	prev->vm_next = vma->vm_next;	rb_erase(&vma->vm_rb, &mm->mm_rb);	if (mm->mmap_cache == vma)
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -