📄 memcontrol.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
					unsigned long *scanned, int order,					int mode, struct zone *z,					struct mem_cgroup *mem_cont,					int active, int file){	unsigned long nr_taken = 0;	struct page *page;	unsigned long scan;	LIST_HEAD(pc_list);	struct list_head *src;	struct page_cgroup *pc, *tmp;	int nid = z->zone_pgdat->node_id;	int zid = zone_idx(z);	struct mem_cgroup_per_zone *mz;	int lru = LRU_FILE * !!file + !!active;	BUG_ON(!mem_cont);	mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);	src = &mz->lists[lru];	scan = 0;	list_for_each_entry_safe_reverse(pc, tmp, src, lru) {		if (scan >= nr_to_scan)			break;		page = pc->page;		if (unlikely(!PageCgroupUsed(pc)))			continue;		if (unlikely(!PageLRU(page)))			continue;		scan++;		if (__isolate_lru_page(page, mode, file) == 0) {			list_move(&page->lru, dst);			nr_taken++;		}	}	*scanned = scan;	return nr_taken;}#define mem_cgroup_from_res_counter(counter, member)	\	container_of(counter, struct mem_cgroup, member)/* * This routine finds the DFS walk successor. This routine should be * called with hierarchy_mutex held */static struct mem_cgroup *__mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem){	struct cgroup *cgroup, *curr_cgroup, *root_cgroup;	curr_cgroup = curr->css.cgroup;	root_cgroup = root_mem->css.cgroup;	if (!list_empty(&curr_cgroup->children)) {		/*		 * Walk down to children		 */		cgroup = list_entry(curr_cgroup->children.next,						struct cgroup, sibling);		curr = mem_cgroup_from_cont(cgroup);		goto done;	}visit_parent:	if (curr_cgroup == root_cgroup) {		/* caller handles NULL case */		curr = NULL;		goto done;	}	/*	 * Goto next sibling	 */	if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) {		cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup,						sibling);		curr = mem_cgroup_from_cont(cgroup);		goto done;	}	/*	 * Go up to next parent and next parent's sibling if need be	 */	curr_cgroup = curr_cgroup->parent;	goto visit_parent;done:	return curr;}/* * Visit the first child (need not be the first child as per the ordering * of the cgroup list, since we track last_scanned_child) of @mem and use * that to reclaim free pages from. */static struct mem_cgroup *mem_cgroup_get_next_node(struct mem_cgroup *root_mem){	struct cgroup *cgroup;	struct mem_cgroup *orig, *next;	bool obsolete;	/*	 * Scan all children under the mem_cgroup mem	 */	mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);	orig = root_mem->last_scanned_child;	obsolete = mem_cgroup_is_obsolete(orig);	if (list_empty(&root_mem->css.cgroup->children)) {		/*		 * root_mem might have children before and last_scanned_child		 * may point to one of them. We put it later.		 */		if (orig)			VM_BUG_ON(!obsolete);		next = NULL;		goto done;	}	if (!orig || obsolete) {		cgroup = list_first_entry(&root_mem->css.cgroup->children,				struct cgroup, sibling);		next = mem_cgroup_from_cont(cgroup);	} else		next = __mem_cgroup_get_next_node(orig, root_mem);done:	if (next)		mem_cgroup_get(next);	root_mem->last_scanned_child = next;	if (orig)		mem_cgroup_put(orig);	mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);	return (next) ? next : root_mem;}static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem){	if (do_swap_account) {		if (res_counter_check_under_limit(&mem->res) &&			res_counter_check_under_limit(&mem->memsw))			return true;	} else		if (res_counter_check_under_limit(&mem->res))			return true;	return false;}static unsigned int get_swappiness(struct mem_cgroup *memcg){	struct cgroup *cgrp = memcg->css.cgroup;	unsigned int swappiness;	/* root ? */	if (cgrp->parent == NULL)		return vm_swappiness;	spin_lock(&memcg->reclaim_param_lock);	swappiness = memcg->swappiness;	spin_unlock(&memcg->reclaim_param_lock);	return swappiness;}/* * Dance down the hierarchy if needed to reclaim memory. We remember the * last child we reclaimed from, so that we don't end up penalizing * one child extensively based on its position in the children list. * * root_mem is the original ancestor that we've been reclaim from. */static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,						gfp_t gfp_mask, bool noswap){	struct mem_cgroup *next_mem;	int ret = 0;	/*	 * Reclaim unconditionally and don't check for return value.	 * We need to reclaim in the current group and down the tree.	 * One might think about checking for children before reclaiming,	 * but there might be left over accounting, even after children	 * have left.	 */	ret += try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,					   get_swappiness(root_mem));	if (mem_cgroup_check_under_limit(root_mem))		return 1;	/* indicate reclaim has succeeded */	if (!root_mem->use_hierarchy)		return ret;	next_mem = mem_cgroup_get_next_node(root_mem);	while (next_mem != root_mem) {		if (mem_cgroup_is_obsolete(next_mem)) {			next_mem = mem_cgroup_get_next_node(root_mem);			continue;		}		ret += try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,						   get_swappiness(next_mem));		if (mem_cgroup_check_under_limit(root_mem))			return 1;	/* indicate reclaim has succeeded */		next_mem = mem_cgroup_get_next_node(root_mem);	}	return ret;}bool mem_cgroup_oom_called(struct task_struct *task){	bool ret = false;	struct mem_cgroup *mem;	struct mm_struct *mm;	rcu_read_lock();	mm = task->mm;	if (!mm)		mm = &init_mm;	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));	if (mem && time_before(jiffies, mem->last_oom_jiffies + HZ/10))		ret = true;	rcu_read_unlock();	return ret;}/* * Unlike exported interface, "oom" parameter is added. if oom==true, * oom-killer can be invoked. */static int __mem_cgroup_try_charge(struct mm_struct *mm,			gfp_t gfp_mask, struct mem_cgroup **memcg,			bool oom){	struct mem_cgroup *mem, *mem_over_limit;	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;	struct res_counter *fail_res;	if (unlikely(test_thread_flag(TIF_MEMDIE))) {		/* Don't account this! */		*memcg = NULL;		return 0;	}	/*	 * We always charge the cgroup the mm_struct belongs to.	 * The mm_struct's mem_cgroup changes on task migration if the	 * thread group leader migrates. It's possible that mm is not	 * set, if so charge the init_mm (happens for pagecache usage).	 */	mem = *memcg;	if (likely(!mem)) {		mem = try_get_mem_cgroup_from_mm(mm);		*memcg = mem;	} else {		css_get(&mem->css);	}	if (unlikely(!mem))		return 0;	VM_BUG_ON(mem_cgroup_is_obsolete(mem));	while (1) {		int ret;		bool noswap = false;		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);		if (likely(!ret)) {			if (!do_swap_account)				break;			ret = res_counter_charge(&mem->memsw, PAGE_SIZE,							&fail_res);			if (likely(!ret))				break;			/* mem+swap counter fails */			res_counter_uncharge(&mem->res, PAGE_SIZE);			noswap = true;			mem_over_limit = mem_cgroup_from_res_counter(fail_res,									memsw);		} else			/* mem counter fails */			mem_over_limit = mem_cgroup_from_res_counter(fail_res,									res);		if (!(gfp_mask & __GFP_WAIT))			goto nomem;		ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,							noswap);		if (ret)			continue;		/*		 * try_to_free_mem_cgroup_pages() might not give us a full		 * picture of reclaim. Some pages are reclaimed and might be		 * moved to swap cache or just unmapped from the cgroup.		 * Check the limit again to see if the reclaim reduced the		 * current usage of the cgroup before giving up		 *		 */		if (mem_cgroup_check_under_limit(mem_over_limit))			continue;		if (!nr_retries--) {			if (oom) {				mutex_lock(&memcg_tasklist);				mem_cgroup_out_of_memory(mem_over_limit, gfp_mask);				mutex_unlock(&memcg_tasklist);				mem_over_limit->last_oom_jiffies = jiffies;			}			goto nomem;		}	}	return 0;nomem:	css_put(&mem->css);	return -ENOMEM;}static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page){	struct mem_cgroup *mem;	swp_entry_t ent;	if (!PageSwapCache(page))		return NULL;	ent.val = page_private(page);	mem = lookup_swap_cgroup(ent);	if (!mem)		return NULL;	if (!css_tryget(&mem->css))		return NULL;	return mem;}/* * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be * USED state. If already USED, uncharge and return. */static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,				     struct page_cgroup *pc,				     enum charge_type ctype){	/* try_charge() can return NULL to *memcg, taking care of it. */	if (!mem)		return;	lock_page_cgroup(pc);	if (unlikely(PageCgroupUsed(pc))) {		unlock_page_cgroup(pc);		res_counter_uncharge(&mem->res, PAGE_SIZE);		if (do_swap_account)			res_counter_uncharge(&mem->memsw, PAGE_SIZE);		css_put(&mem->css);		return;	}	pc->mem_cgroup = mem;	smp_wmb();	pc->flags = pcg_default_flags[ctype];	mem_cgroup_charge_statistics(mem, pc, true);	unlock_page_cgroup(pc);}/** * mem_cgroup_move_account - move account of the page * @pc:	page_cgroup of the page. * @from: mem_cgroup which the page is moved from. * @to:	mem_cgroup which the page is moved to. @from != @to. * * The caller must confirm following. * - page is not on LRU (isolate_page() is useful.) * * returns 0 at success, * returns -EBUSY when lock is busy or "pc" is unstable. * * This function does "uncharge" from old cgroup but doesn't do "charge" to * new cgroup. It should be done by a caller. */static int mem_cgroup_move_account(struct page_cgroup *pc,	struct mem_cgroup *from, struct mem_cgroup *to){	struct mem_cgroup_per_zone *from_mz, *to_mz;	int nid, zid;	int ret = -EBUSY;	VM_BUG_ON(from == to);	VM_BUG_ON(PageLRU(pc->page));	nid = page_cgroup_nid(pc);	zid = page_cgroup_zid(pc);	from_mz =  mem_cgroup_zoneinfo(from, nid, zid);	to_mz =  mem_cgroup_zoneinfo(to, nid, zid);	if (!trylock_page_cgroup(pc))		return ret;	if (!PageCgroupUsed(pc))		goto out;	if (pc->mem_cgroup != from)		goto out;	res_counter_uncharge(&from->res, PAGE_SIZE);	mem_cgroup_charge_statistics(from, pc, false);	if (do_swap_account)		res_counter_uncharge(&from->memsw, PAGE_SIZE);	css_put(&from->css);	css_get(&to->css);	pc->mem_cgroup = to;	mem_cgroup_charge_statistics(to, pc, true);	ret = 0;out:	unlock_page_cgroup(pc);	return ret;}/* * move charges to its parent. */static int mem_cgroup_move_parent(struct page_cgroup *pc,				  struct mem_cgroup *child,				  gfp_t gfp_mask){	struct page *page = pc->page;	struct cgroup *cg = child->css.cgroup;	struct cgroup *pcg = cg->parent;	struct mem_cgroup *parent;	int ret;	/* Is ROOT ? */	if (!pcg)		return -EINVAL;	parent = mem_cgroup_from_cont(pcg);	ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false);	if (ret || !parent)		return ret;	if (!get_page_unless_zero(page)) {		ret = -EBUSY;		goto uncharge;	}	ret = isolate_lru_page(page);	if (ret)		goto cancel;	ret = mem_cgroup_move_account(pc, child, parent);	putback_lru_page(page);	if (!ret) {		put_page(page);		/* drop extra refcnt by try_charge() */		css_put(&parent->css);		return 0;	}cancel:	put_page(page);uncharge:	/* drop extra refcnt by try_charge() */	css_put(&parent->css);	/* uncharge if move fails */	res_counter_uncharge(&parent->res, PAGE_SIZE);	if (do_swap_account)		res_counter_uncharge(&parent->memsw, PAGE_SIZE);	return ret;}/* * Charge the memory controller for page usage. * Return * 0 if the charge was successful * < 0 if the cgroup is over its limit */static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,				gfp_t gfp_mask, enum charge_type ctype,				struct mem_cgroup *memcg){	struct mem_cgroup *mem;	struct page_cgroup *pc;	int ret;	pc = lookup_page_cgroup(page);	/* can happen at boot */	if (unlikely(!pc))		return 0;	prefetchw(pc);	mem = memcg;	ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true);	if (ret || !mem)		return ret;	__mem_cgroup_commit_charge(mem, pc, ctype);	return 0;}int mem_cgroup_newpage_charge(struct page *page,			      struct mm_struct *mm, gfp_t gfp_mask){	if (mem_cgroup_disabled())		return 0;	if (PageCompound(page))		return 0;	/*	 * If already mapped, we don't have to account.	 * If page cache, page->mapping has address_space.	 * But page->mapping may have out-of-use anon_vma pointer,	 * detecit it by PageAnon() check. newly-mapped-anon's page->mapping	 * is NULL.  	 */	if (page_mapped(page) || (page->mapping && !PageAnon(page)))		return 0;	if (unlikely(!mm))		mm = &init_mm;	return mem_cgroup_charge_common(page, mm, gfp_mask,				MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);}int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,				gfp_t gfp_mask){	struct mem_cgroup *mem = NULL;	int ret;	if (mem_cgroup_disabled())		return 0;	if (PageCompound(page))		return 0;	/*	 * Corner case handling. This is called from add_to_page_cache()	 * in usual. But some FS (shmem) precharges this page before calling it	 * and call add_to_page_cache() with GFP_NOWAIT.	 *	 * For GFP_NOWAIT case, the page may be pre-charged before calling	 * add_to_page_cache(). (See shmem.c) check it here and avoid to call	 * charge twice. (It works but has to pay a bit larger cost.)	 * And when the page is SwapCache, it should take swap information	 * into account. This is under lock_page() now.	 */	if (!(gfp_mask & __GFP_WAIT)) {		struct page_cgroup *pc;		pc = lookup_page_cgroup(page);		if (!pc)			return 0;		lock_page_cgroup(pc);		if (PageCgroupUsed(pc)) {			unlock_page_cgroup(pc);			return 0;		}		unlock_page_cgroup(pc);	}	if (do_swap_account && PageSwapCache(page)) {		mem = try_get_mem_cgroup_from_swapcache(page);		if (mem)			mm = NULL;		  else			mem = NULL;		/* SwapCache may be still linked to LRU now. */		mem_cgroup_lru_del_before_commit_swapcache(page);	}	if (unlikely(!mm && !mem))		mm = &init_mm;	if (page_is_file_cache(page))		return mem_cgroup_charge_common(page, mm, gfp_mask,				MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -