📄 mempolicy.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
	err = security_task_movememory(task);	if (err)		goto out;	err = do_migrate_pages(mm, &old, &new,		capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);out:	mmput(mm);	return err;}/* Retrieve NUMA policy */SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,		unsigned long __user *, nmask, unsigned long, maxnode,		unsigned long, addr, unsigned long, flags){	int err;	int uninitialized_var(pval);	nodemask_t nodes;	if (nmask != NULL && maxnode < MAX_NUMNODES)		return -EINVAL;	err = do_get_mempolicy(&pval, &nodes, addr, flags);	if (err)		return err;	if (policy && put_user(pval, policy))		return -EFAULT;	if (nmask)		err = copy_nodes_to_user(nmask, maxnode, &nodes);	return err;}#ifdef CONFIG_COMPATasmlinkage long compat_sys_get_mempolicy(int __user *policy,				     compat_ulong_t __user *nmask,				     compat_ulong_t maxnode,				     compat_ulong_t addr, compat_ulong_t flags){	long err;	unsigned long __user *nm = NULL;	unsigned long nr_bits, alloc_size;	DECLARE_BITMAP(bm, MAX_NUMNODES);	nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);	alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;	if (nmask)		nm = compat_alloc_user_space(alloc_size);	err = sys_get_mempolicy(policy, nm, nr_bits+1, addr, flags);	if (!err && nmask) {		err = copy_from_user(bm, nm, alloc_size);		/* ensure entire bitmap is zeroed */		err |= clear_user(nmask, ALIGN(maxnode-1, 8) / 8);		err |= compat_put_bitmap(nmask, bm, nr_bits);	}	return err;}asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask,				     compat_ulong_t maxnode){	long err = 0;	unsigned long __user *nm = NULL;	unsigned long nr_bits, alloc_size;	DECLARE_BITMAP(bm, MAX_NUMNODES);	nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);	alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;	if (nmask) {		err = compat_get_bitmap(bm, nmask, nr_bits);		nm = compat_alloc_user_space(alloc_size);		err |= copy_to_user(nm, bm, alloc_size);	}	if (err)		return -EFAULT;	return sys_set_mempolicy(mode, nm, nr_bits+1);}asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,			     compat_ulong_t mode, compat_ulong_t __user *nmask,			     compat_ulong_t maxnode, compat_ulong_t flags){	long err = 0;	unsigned long __user *nm = NULL;	unsigned long nr_bits, alloc_size;	nodemask_t bm;	nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);	alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;	if (nmask) {		err = compat_get_bitmap(nodes_addr(bm), nmask, nr_bits);		nm = compat_alloc_user_space(alloc_size);		err |= copy_to_user(nm, nodes_addr(bm), alloc_size);	}	if (err)		return -EFAULT;	return sys_mbind(start, len, mode, nm, nr_bits+1, flags);}#endif/* * get_vma_policy(@task, @vma, @addr) * @task - task for fallback if vma policy == default * @vma   - virtual memory area whose policy is sought * @addr  - address in @vma for shared policy lookup * * Returns effective policy for a VMA at specified address. * Falls back to @task or system default policy, as necessary. * Current or other task's task mempolicy and non-shared vma policies * are protected by the task's mmap_sem, which must be held for read by * the caller. * Shared policies [those marked as MPOL_F_SHARED] require an extra reference * count--added by the get_policy() vm_op, as appropriate--to protect against * freeing by another task.  It is the caller's responsibility to free the * extra reference for shared policies. */static struct mempolicy *get_vma_policy(struct task_struct *task,		struct vm_area_struct *vma, unsigned long addr){	struct mempolicy *pol = task->mempolicy;	if (vma) {		if (vma->vm_ops && vma->vm_ops->get_policy) {			struct mempolicy *vpol = vma->vm_ops->get_policy(vma,									addr);			if (vpol)				pol = vpol;		} else if (vma->vm_policy)			pol = vma->vm_policy;	}	if (!pol)		pol = &default_policy;	return pol;}/* * Return a nodemask representing a mempolicy for filtering nodes for * page allocation */static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy){	/* Lower zones don't get a nodemask applied for MPOL_BIND */	if (unlikely(policy->mode == MPOL_BIND) &&			gfp_zone(gfp) >= policy_zone &&			cpuset_nodemask_valid_mems_allowed(&policy->v.nodes))		return &policy->v.nodes;	return NULL;}/* Return a zonelist indicated by gfp for node representing a mempolicy */static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy){	int nd = numa_node_id();	switch (policy->mode) {	case MPOL_PREFERRED:		if (!(policy->flags & MPOL_F_LOCAL))			nd = policy->v.preferred_node;		break;	case MPOL_BIND:		/*		 * Normally, MPOL_BIND allocations are node-local within the		 * allowed nodemask.  However, if __GFP_THISNODE is set and the		 * current node is part of the mask, we use the zonelist for		 * the first node in the mask instead.		 */		if (unlikely(gfp & __GFP_THISNODE) &&				unlikely(!node_isset(nd, policy->v.nodes)))			nd = first_node(policy->v.nodes);		break;	case MPOL_INTERLEAVE: /* should not happen */		break;	default:		BUG();	}	return node_zonelist(nd, gfp);}/* Do dynamic interleaving for a process */static unsigned interleave_nodes(struct mempolicy *policy){	unsigned nid, next;	struct task_struct *me = current;	nid = me->il_next;	next = next_node(nid, policy->v.nodes);	if (next >= MAX_NUMNODES)		next = first_node(policy->v.nodes);	if (next < MAX_NUMNODES)		me->il_next = next;	return nid;}/* * Depending on the memory policy provide a node from which to allocate the * next slab entry. * @policy must be protected by freeing by the caller.  If @policy is * the current task's mempolicy, this protection is implicit, as only the * task can change it's policy.  The system default policy requires no * such protection. */unsigned slab_node(struct mempolicy *policy){	if (!policy || policy->flags & MPOL_F_LOCAL)		return numa_node_id();	switch (policy->mode) {	case MPOL_PREFERRED:		/*		 * handled MPOL_F_LOCAL above		 */		return policy->v.preferred_node;	case MPOL_INTERLEAVE:		return interleave_nodes(policy);	case MPOL_BIND: {		/*		 * Follow bind policy behavior and start allocation at the		 * first node.		 */		struct zonelist *zonelist;		struct zone *zone;		enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL);		zonelist = &NODE_DATA(numa_node_id())->node_zonelists[0];		(void)first_zones_zonelist(zonelist, highest_zoneidx,							&policy->v.nodes,							&zone);		return zone->node;	}	default:		BUG();	}}/* Do static interleaving for a VMA with known offset. */static unsigned offset_il_node(struct mempolicy *pol,		struct vm_area_struct *vma, unsigned long off){	unsigned nnodes = nodes_weight(pol->v.nodes);	unsigned target;	int c;	int nid = -1;	if (!nnodes)		return numa_node_id();	target = (unsigned int)off % nnodes;	c = 0;	do {		nid = next_node(nid, pol->v.nodes);		c++;	} while (c <= target);	return nid;}/* Determine a node number for interleave */static inline unsigned interleave_nid(struct mempolicy *pol,		 struct vm_area_struct *vma, unsigned long addr, int shift){	if (vma) {		unsigned long off;		/*		 * for small pages, there is no difference between		 * shift and PAGE_SHIFT, so the bit-shift is safe.		 * for huge pages, since vm_pgoff is in units of small		 * pages, we need to shift off the always 0 bits to get		 * a useful offset.		 */		BUG_ON(shift < PAGE_SHIFT);		off = vma->vm_pgoff >> (shift - PAGE_SHIFT);		off += (addr - vma->vm_start) >> shift;		return offset_il_node(pol, vma, off);	} else		return interleave_nodes(pol);}#ifdef CONFIG_HUGETLBFS/* * huge_zonelist(@vma, @addr, @gfp_flags, @mpol) * @vma = virtual memory area whose policy is sought * @addr = address in @vma for shared policy lookup and interleave policy * @gfp_flags = for requested zone * @mpol = pointer to mempolicy pointer for reference counted mempolicy * @nodemask = pointer to nodemask pointer for MPOL_BIND nodemask * * Returns a zonelist suitable for a huge page allocation and a pointer * to the struct mempolicy for conditional unref after allocation. * If the effective policy is 'BIND, returns a pointer to the mempolicy's * @nodemask for filtering the zonelist. */struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,				gfp_t gfp_flags, struct mempolicy **mpol,				nodemask_t **nodemask){	struct zonelist *zl;	*mpol = get_vma_policy(current, vma, addr);	*nodemask = NULL;	/* assume !MPOL_BIND */	if (unlikely((*mpol)->mode == MPOL_INTERLEAVE)) {		zl = node_zonelist(interleave_nid(*mpol, vma, addr,				huge_page_shift(hstate_vma(vma))), gfp_flags);	} else {		zl = policy_zonelist(gfp_flags, *mpol);		if ((*mpol)->mode == MPOL_BIND)			*nodemask = &(*mpol)->v.nodes;	}	return zl;}#endif/* Allocate a page in interleaved policy.   Own path because it needs to do special accounting. */static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,					unsigned nid){	struct zonelist *zl;	struct page *page;	zl = node_zonelist(nid, gfp);	page = __alloc_pages(gfp, order, zl);	if (page && page_zone(page) == zonelist_zone(&zl->_zonerefs[0]))		inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);	return page;}/** * 	alloc_page_vma	- Allocate a page for a VMA. * * 	@gfp: *      %GFP_USER    user allocation. *      %GFP_KERNEL  kernel allocations, *      %GFP_HIGHMEM highmem/user allocations, *      %GFP_FS      allocation should not call back into a file system. *      %GFP_ATOMIC  don't sleep. * * 	@vma:  Pointer to VMA or NULL if not available. *	@addr: Virtual Address of the allocation. Must be inside the VMA. * * 	This function allocates a page from the kernel page pool and applies *	a NUMA policy associated with the VMA or the current process. *	When VMA is not NULL caller must hold down_read on the mmap_sem of the *	mm_struct of the VMA to prevent it from going away. Should be used for *	all allocations for pages that will be mapped into * 	user space. Returns NULL when no page can be allocated. * *	Should be called with the mm_sem of the vma hold. */struct page *alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr){	struct mempolicy *pol = get_vma_policy(current, vma, addr);	struct zonelist *zl;	cpuset_update_task_memory_state();	if (unlikely(pol->mode == MPOL_INTERLEAVE)) {		unsigned nid;		nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);		mpol_cond_put(pol);		return alloc_page_interleave(gfp, 0, nid);	}	zl = policy_zonelist(gfp, pol);	if (unlikely(mpol_needs_cond_ref(pol))) {		/*		 * slow path: ref counted shared policy		 */		struct page *page =  __alloc_pages_nodemask(gfp, 0,						zl, policy_nodemask(gfp, pol));		__mpol_put(pol);		return page;	}	/*	 * fast path:  default or task policy	 */	return __alloc_pages_nodemask(gfp, 0, zl, policy_nodemask(gfp, pol));}/** * 	alloc_pages_current - Allocate pages. * *	@gfp: *		%GFP_USER   user allocation, *      	%GFP_KERNEL kernel allocation, *      	%GFP_HIGHMEM highmem allocation, *      	%GFP_FS     don't call back into a file system. *      	%GFP_ATOMIC don't sleep. *	@order: Power of two of allocation size in pages. 0 is a single page. * *	Allocate a page from the kernel page pool.  When not in *	interrupt context and apply the current process NUMA policy. *	Returns NULL when no page can be allocated. * *	Don't call cpuset_update_task_memory_state() unless *	1) it's ok to take cpuset_sem (can WAIT), and *	2) allocating for current task (not interrupt). */struct page *alloc_pages_current(gfp_t gfp, unsigned order){	struct mempolicy *pol = current->mempolicy;	if ((gfp & __GFP_WAIT) && !in_interrupt())		cpuset_update_task_memory_state();	if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))		pol = &default_policy;	/*	 * No reference counting needed for current->mempolicy	 * nor system default_policy	 */	if (pol->mode == MPOL_INTERLEAVE)		return alloc_page_interleave(gfp, order, interleave_nodes(pol));	return __alloc_pages_nodemask(gfp, order,			policy_zonelist(gfp, pol), policy_nodemask(gfp, pol));}EXPORT_SYMBOL(alloc_pages_current);/* * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it * rebinds the mempolicy its copying by calling mpol_rebind_policy() * with the mems_allowed returned by cpuset_mems_allowed().  This * keeps mempolicies cpuset relative after its cpuset moves.  See * further kernel/cpuset.c update_nodemask(). *//* Slow path of a mempolicy duplicate */struct mempolicy *__mpol_dup(struct mempolicy *old){	struct mempolicy *new = kmem_cache_alloc(policy_cache, GFP_KERNEL);	if (!new)		return ERR_PTR(-ENOMEM);	if (current_cpuset_is_being_rebound()) {		nodemask_t mems = cpuset_mems_allowed(current);		mpol_rebind_policy(old, &mems);	}	*new = *old;	atomic_set(&new->refcnt, 1);	return new;}/* * If *frompol needs [has] an extra ref, copy *frompol to *tompol , * eliminate the * MPOL_F_* flags that require conditional ref and * [NOTE!!!] drop the extra ref.  Not safe to reference *frompol directly * after return.  Use the returned value. * * Allows use of a mempolicy for, e.g., multiple allocations with a single * policy lookup, even if the policy needs/has extra ref on lookup. * shmem_readahead needs this. */struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol,						struct mempolicy *frompol){	if (!mpol_needs_cond_ref(frompol))		return frompol;	*tompol = *frompol;	tompol->flags &= ~MPOL_F_SHARED;	/* copy doesn't need unref */	__mpol_put(frompol);	return tompol;}static int mpol_match_intent(const struct mempolicy *a,			     const struct mempolicy *b){	if (a->flags != b->flags)		return 0;	if (!mpol_store_user_nodemask(a))		return 1;	return nodes_equal(a->w.user_nodemask, b->w.user_nodemask);}/* Slow path of a mempolicy comparison */int __mpol_equal(struct mempolicy *a, struct mempolicy *b){	if (!a || !b)		return 0;	if (a->mode != b->mode)		return 0;	if (a->mode != MPOL_DEFAULT && !mpol_match_intent(a, b))		return 0;	switch (a->mode) {	case MPOL_BIND:		/* Fall through */	case MPOL_INTERLEAVE:		return nodes_equal(a->v.nodes, b->v.nodes);	case MPOL_PREFERRED:		return a->v.preferred_node == b->v.preferred_node &&			a->flags == b->flags;	default:		BUG();		return 0;	}}/* * Shared memory backing store policy support. * * Remember policies even when nobody has shared memory mapped. * The policies are kept in Red-Black tree linked from the inode. * They are protected by the sp->lock spinlock, which should be held * for any accesses to the tree. *//* lookup first element intersecting start-end *//* Caller holds sp->lock */static struct sp_node *sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end){	struct rb_node *n = sp->root.rb_node;	while (n) {		struct sp_node *p = rb_entry(n, struct sp_node, nd);		if (start >= p->end)			n = n->rb_right;		else if (end <= p->start)			n = n->rb_left;		else			break;	}	if (!n)		return NULL;	for (;;) {		struct sp_node *w = NULL;		struct rb_node *prev = rb_prev(n);		if (!prev)			break;		w = rb_entry(prev, struct sp_node, nd);		if (w->end <= start)			break;		n = prev;	}	return rb_entry(n, struct sp_node, nd);}/* Insert a new shared policy into the list. *//* Caller holds sp->lock */static void sp_insert(struct shared_policy *sp, struct sp_node *new){	struct rb_node **p = &sp->root.rb_node;	struct rb_node *parent = NULL;	struct sp_node *nd;	while (*p) {		parent = *p;		nd = rb_entry(parent, struct sp_node, nd);		if (new->start < nd->start)			p = &(*p)->rb_left;		else if (new->end > nd->end)			p = &(*p)->rb_right;		else			BUG();	}	rb_link_node(&new->nd, parent, p);	rb_insert_color(&new->nd, &sp->root);	pr_debug("inserting %lx-%lx: %d\n", new->start, new->end,		 new->policy ? new->policy->mode : 0);}/* Find shared policy intersecting idx */struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -