⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mempolicy.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
{	if (p->mempolicy)		p->flags |= PF_MEMPOLICY;	else		p->flags &= ~PF_MEMPOLICY;}static void mpol_set_task_struct_flag(void){	mpol_fix_fork_child_flag(current);}/* Set the process memory policy */static long do_set_mempolicy(unsigned short mode, unsigned short flags,			     nodemask_t *nodes){	struct mempolicy *new;	struct mm_struct *mm = current->mm;	new = mpol_new(mode, flags, nodes);	if (IS_ERR(new))		return PTR_ERR(new);	/*	 * prevent changing our mempolicy while show_numa_maps()	 * is using it.	 * Note:  do_set_mempolicy() can be called at init time	 * with no 'mm'.	 */	if (mm)		down_write(&mm->mmap_sem);	mpol_put(current->mempolicy);	current->mempolicy = new;	mpol_set_task_struct_flag();	if (new && new->mode == MPOL_INTERLEAVE &&	    nodes_weight(new->v.nodes))		current->il_next = first_node(new->v.nodes);	if (mm)		up_write(&mm->mmap_sem);	return 0;}/* * Return nodemask for policy for get_mempolicy() query */static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes){	nodes_clear(*nodes);	if (p == &default_policy)		return;	switch (p->mode) {	case MPOL_BIND:		/* Fall through */	case MPOL_INTERLEAVE:		*nodes = p->v.nodes;		break;	case MPOL_PREFERRED:		if (!(p->flags & MPOL_F_LOCAL))			node_set(p->v.preferred_node, *nodes);		/* else return empty node mask for local allocation */		break;	default:		BUG();	}}static int lookup_node(struct mm_struct *mm, unsigned long addr){	struct page *p;	int err;	err = get_user_pages(current, mm, addr & PAGE_MASK, 1, 0, 0, &p, NULL);	if (err >= 0) {		err = page_to_nid(p);		put_page(p);	}	return err;}/* Retrieve NUMA policy */static long do_get_mempolicy(int *policy, nodemask_t *nmask,			     unsigned long addr, unsigned long flags){	int err;	struct mm_struct *mm = current->mm;	struct vm_area_struct *vma = NULL;	struct mempolicy *pol = current->mempolicy;	cpuset_update_task_memory_state();	if (flags &		~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED))		return -EINVAL;	if (flags & MPOL_F_MEMS_ALLOWED) {		if (flags & (MPOL_F_NODE|MPOL_F_ADDR))			return -EINVAL;		*policy = 0;	/* just so it's initialized */		*nmask  = cpuset_current_mems_allowed;		return 0;	}	if (flags & MPOL_F_ADDR) {		/*		 * Do NOT fall back to task policy if the		 * vma/shared policy at addr is NULL.  We		 * want to return MPOL_DEFAULT in this case.		 */		down_read(&mm->mmap_sem);		vma = find_vma_intersection(mm, addr, addr+1);		if (!vma) {			up_read(&mm->mmap_sem);			return -EFAULT;		}		if (vma->vm_ops && vma->vm_ops->get_policy)			pol = vma->vm_ops->get_policy(vma, addr);		else			pol = vma->vm_policy;	} else if (addr)		return -EINVAL;	if (!pol)		pol = &default_policy;	/* indicates default behavior */	if (flags & MPOL_F_NODE) {		if (flags & MPOL_F_ADDR) {			err = lookup_node(mm, addr);			if (err < 0)				goto out;			*policy = err;		} else if (pol == current->mempolicy &&				pol->mode == MPOL_INTERLEAVE) {			*policy = current->il_next;		} else {			err = -EINVAL;			goto out;		}	} else {		*policy = pol == &default_policy ? MPOL_DEFAULT :						pol->mode;		/*		 * Internal mempolicy flags must be masked off before exposing		 * the policy to userspace.		 */		*policy |= (pol->flags & MPOL_MODE_FLAGS);	}	if (vma) {		up_read(&current->mm->mmap_sem);		vma = NULL;	}	err = 0;	if (nmask)		get_policy_nodemask(pol, nmask); out:	mpol_cond_put(pol);	if (vma)		up_read(&current->mm->mmap_sem);	return err;}#ifdef CONFIG_MIGRATION/* * page migration */static void migrate_page_add(struct page *page, struct list_head *pagelist,				unsigned long flags){	/*	 * Avoid migrating a page that is shared with others.	 */	if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) {		if (!isolate_lru_page(page)) {			list_add_tail(&page->lru, pagelist);		}	}}static struct page *new_node_page(struct page *page, unsigned long node, int **x){	return alloc_pages_node(node, GFP_HIGHUSER_MOVABLE, 0);}/* * Migrate pages from one node to a target node. * Returns error or the number of pages not migrated. */static int migrate_to_node(struct mm_struct *mm, int source, int dest,			   int flags){	nodemask_t nmask;	LIST_HEAD(pagelist);	int err = 0;	nodes_clear(nmask);	node_set(source, nmask);	check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nmask,			flags | MPOL_MF_DISCONTIG_OK, &pagelist);	if (!list_empty(&pagelist))		err = migrate_pages(&pagelist, new_node_page, dest);	return err;}/* * Move pages between the two nodesets so as to preserve the physical * layout as much as possible. * * Returns the number of page that could not be moved. */int do_migrate_pages(struct mm_struct *mm,	const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags){	int busy = 0;	int err;	nodemask_t tmp;	err = migrate_prep();	if (err)		return err;	down_read(&mm->mmap_sem);	err = migrate_vmas(mm, from_nodes, to_nodes, flags);	if (err)		goto out;/* * Find a 'source' bit set in 'tmp' whose corresponding 'dest' * bit in 'to' is not also set in 'tmp'.  Clear the found 'source' * bit in 'tmp', and return that <source, dest> pair for migration. * The pair of nodemasks 'to' and 'from' define the map. * * If no pair of bits is found that way, fallback to picking some * pair of 'source' and 'dest' bits that are not the same.  If the * 'source' and 'dest' bits are the same, this represents a node * that will be migrating to itself, so no pages need move. * * If no bits are left in 'tmp', or if all remaining bits left * in 'tmp' correspond to the same bit in 'to', return false * (nothing left to migrate). * * This lets us pick a pair of nodes to migrate between, such that * if possible the dest node is not already occupied by some other * source node, minimizing the risk of overloading the memory on a * node that would happen if we migrated incoming memory to a node * before migrating outgoing memory source that same node. * * A single scan of tmp is sufficient.  As we go, we remember the * most recent <s, d> pair that moved (s != d).  If we find a pair * that not only moved, but what's better, moved to an empty slot * (d is not set in tmp), then we break out then, with that pair. * Otherwise when we finish scannng from_tmp, we at least have the * most recent <s, d> pair that moved.  If we get all the way through * the scan of tmp without finding any node that moved, much less * moved to an empty node, then there is nothing left worth migrating. */	tmp = *from_nodes;	while (!nodes_empty(tmp)) {		int s,d;		int source = -1;		int dest = 0;		for_each_node_mask(s, tmp) {			d = node_remap(s, *from_nodes, *to_nodes);			if (s == d)				continue;			source = s;	/* Node moved. Memorize */			dest = d;			/* dest not in remaining from nodes? */			if (!node_isset(dest, tmp))				break;		}		if (source == -1)			break;		node_clear(source, tmp);		err = migrate_to_node(mm, source, dest, flags);		if (err > 0)			busy += err;		if (err < 0)			break;	}out:	up_read(&mm->mmap_sem);	if (err < 0)		return err;	return busy;}/* * Allocate a new page for page migration based on vma policy. * Start assuming that page is mapped by vma pointed to by @private. * Search forward from there, if not.  N.B., this assumes that the * list of pages handed to migrate_pages()--which is how we get here-- * is in virtual address order. */static struct page *new_vma_page(struct page *page, unsigned long private, int **x){	struct vm_area_struct *vma = (struct vm_area_struct *)private;	unsigned long uninitialized_var(address);	while (vma) {		address = page_address_in_vma(page, vma);		if (address != -EFAULT)			break;		vma = vma->vm_next;	}	/*	 * if !vma, alloc_page_vma() will use task or system default policy	 */	return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);}#elsestatic void migrate_page_add(struct page *page, struct list_head *pagelist,				unsigned long flags){}int do_migrate_pages(struct mm_struct *mm,	const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags){	return -ENOSYS;}static struct page *new_vma_page(struct page *page, unsigned long private, int **x){	return NULL;}#endifstatic long do_mbind(unsigned long start, unsigned long len,		     unsigned short mode, unsigned short mode_flags,		     nodemask_t *nmask, unsigned long flags){	struct vm_area_struct *vma;	struct mm_struct *mm = current->mm;	struct mempolicy *new;	unsigned long end;	int err;	LIST_HEAD(pagelist);	if (flags & ~(unsigned long)(MPOL_MF_STRICT |				     MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))		return -EINVAL;	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))		return -EPERM;	if (start & ~PAGE_MASK)		return -EINVAL;	if (mode == MPOL_DEFAULT)		flags &= ~MPOL_MF_STRICT;	len = (len + PAGE_SIZE - 1) & PAGE_MASK;	end = start + len;	if (end < start)		return -EINVAL;	if (end == start)		return 0;	new = mpol_new(mode, mode_flags, nmask);	if (IS_ERR(new))		return PTR_ERR(new);	/*	 * If we are using the default policy then operation	 * on discontinuous address spaces is okay after all	 */	if (!new)		flags |= MPOL_MF_DISCONTIG_OK;	pr_debug("mbind %lx-%lx mode:%d flags:%d nodes:%lx\n",		 start, start + len, mode, mode_flags,		 nmask ? nodes_addr(*nmask)[0] : -1);	if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {		err = migrate_prep();		if (err)			return err;	}	down_write(&mm->mmap_sem);	vma = check_range(mm, start, end, nmask,			  flags | MPOL_MF_INVERT, &pagelist);	err = PTR_ERR(vma);	if (!IS_ERR(vma)) {		int nr_failed = 0;		err = mbind_range(vma, start, end, new);		if (!list_empty(&pagelist))			nr_failed = migrate_pages(&pagelist, new_vma_page,						(unsigned long)vma);		if (!err && nr_failed && (flags & MPOL_MF_STRICT))			err = -EIO;	}	up_write(&mm->mmap_sem);	mpol_put(new);	return err;}/* * User space interface with variable sized bitmaps for nodelists. *//* Copy a node mask from user space. */static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,		     unsigned long maxnode){	unsigned long k;	unsigned long nlongs;	unsigned long endmask;	--maxnode;	nodes_clear(*nodes);	if (maxnode == 0 || !nmask)		return 0;	if (maxnode > PAGE_SIZE*BITS_PER_BYTE)		return -EINVAL;	nlongs = BITS_TO_LONGS(maxnode);	if ((maxnode % BITS_PER_LONG) == 0)		endmask = ~0UL;	else		endmask = (1UL << (maxnode % BITS_PER_LONG)) - 1;	/* When the user specified more nodes than supported just check	   if the non supported part is all zero. */	if (nlongs > BITS_TO_LONGS(MAX_NUMNODES)) {		if (nlongs > PAGE_SIZE/sizeof(long))			return -EINVAL;		for (k = BITS_TO_LONGS(MAX_NUMNODES); k < nlongs; k++) {			unsigned long t;			if (get_user(t, nmask + k))				return -EFAULT;			if (k == nlongs - 1) {				if (t & endmask)					return -EINVAL;			} else if (t)				return -EINVAL;		}		nlongs = BITS_TO_LONGS(MAX_NUMNODES);		endmask = ~0UL;	}	if (copy_from_user(nodes_addr(*nodes), nmask, nlongs*sizeof(unsigned long)))		return -EFAULT;	nodes_addr(*nodes)[nlongs-1] &= endmask;	return 0;}/* Copy a kernel node mask to user space */static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,			      nodemask_t *nodes){	unsigned long copy = ALIGN(maxnode-1, 64) / 8;	const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long);	if (copy > nbytes) {		if (copy > PAGE_SIZE)			return -EINVAL;		if (clear_user((char __user *)mask + nbytes, copy - nbytes))			return -EFAULT;		copy = nbytes;	}	return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0;}SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,		unsigned long, mode, unsigned long __user *, nmask,		unsigned long, maxnode, unsigned, flags){	nodemask_t nodes;	int err;	unsigned short mode_flags;	mode_flags = mode & MPOL_MODE_FLAGS;	mode &= ~MPOL_MODE_FLAGS;	if (mode >= MPOL_MAX)		return -EINVAL;	if ((mode_flags & MPOL_F_STATIC_NODES) &&	    (mode_flags & MPOL_F_RELATIVE_NODES))		return -EINVAL;	err = get_nodes(&nodes, nmask, maxnode);	if (err)		return err;	return do_mbind(start, len, mode, mode_flags, &nodes, flags);}/* Set the process memory policy */SYSCALL_DEFINE3(set_mempolicy, int, mode, unsigned long __user *, nmask,		unsigned long, maxnode){	int err;	nodemask_t nodes;	unsigned short flags;	flags = mode & MPOL_MODE_FLAGS;	mode &= ~MPOL_MODE_FLAGS;	if ((unsigned int)mode >= MPOL_MAX)		return -EINVAL;	if ((flags & MPOL_F_STATIC_NODES) && (flags & MPOL_F_RELATIVE_NODES))		return -EINVAL;	err = get_nodes(&nodes, nmask, maxnode);	if (err)		return err;	return do_set_mempolicy(mode, flags, &nodes);}SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,		const unsigned long __user *, old_nodes,		const unsigned long __user *, new_nodes){	const struct cred *cred = current_cred(), *tcred;	struct mm_struct *mm;	struct task_struct *task;	nodemask_t old;	nodemask_t new;	nodemask_t task_nodes;	int err;	err = get_nodes(&old, old_nodes, maxnode);	if (err)		return err;	err = get_nodes(&new, new_nodes, maxnode);	if (err)		return err;	/* Find the mm_struct */	read_lock(&tasklist_lock);	task = pid ? find_task_by_vpid(pid) : current;	if (!task) {		read_unlock(&tasklist_lock);		return -ESRCH;	}	mm = get_task_mm(task);	read_unlock(&tasklist_lock);	if (!mm)		return -EINVAL;	/*	 * Check if this process has the right to modify the specified	 * process. The right exists if the process has administrative	 * capabilities, superuser privileges or the same	 * userid as the target process.	 */	rcu_read_lock();	tcred = __task_cred(task);	if (cred->euid != tcred->suid && cred->euid != tcred->uid &&	    cred->uid  != tcred->suid && cred->uid  != tcred->uid &&	    !capable(CAP_SYS_NICE)) {		rcu_read_unlock();		err = -EPERM;		goto out;	}	rcu_read_unlock();	task_nodes = cpuset_mems_allowed(task);	/* Is the user allowed to access the target nodes? */	if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_NICE)) {		err = -EPERM;		goto out;	}	if (!nodes_subset(new, node_states[N_HIGH_MEMORY])) {		err = -EINVAL;		goto out;	}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -