📄 memcontrol.c
字号:
ret = mem_cgroup_charge_common(page, mm, gfp_mask, MEM_CGROUP_CHARGE_TYPE_SHMEM, mem); if (mem) css_put(&mem->css); if (PageSwapCache(page)) mem_cgroup_lru_add_after_commit_swapcache(page); if (do_swap_account && !ret && PageSwapCache(page)) { swp_entry_t ent = {.val = page_private(page)}; /* avoid double counting */ mem = swap_cgroup_record(ent, NULL); if (mem) { res_counter_uncharge(&mem->memsw, PAGE_SIZE); mem_cgroup_put(mem); } } return ret;}/* * While swap-in, try_charge -> commit or cancel, the page is locked. * And when try_charge() successfully returns, one refcnt to memcg without * struct page_cgroup is aquired. This refcnt will be cumsumed by * "commit()" or removed by "cancel()" */int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, gfp_t mask, struct mem_cgroup **ptr){ struct mem_cgroup *mem; int ret; if (mem_cgroup_disabled()) return 0; if (!do_swap_account) goto charge_cur_mm; /* * A racing thread's fault, or swapoff, may have already updated * the pte, and even removed page from swap cache: return success * to go on to do_swap_page()'s pte_same() test, which should fail. */ if (!PageSwapCache(page)) return 0; mem = try_get_mem_cgroup_from_swapcache(page); if (!mem) goto charge_cur_mm; *ptr = mem; ret = __mem_cgroup_try_charge(NULL, mask, ptr, true); /* drop extra refcnt from tryget */ css_put(&mem->css); return ret;charge_cur_mm: if (unlikely(!mm)) mm = &init_mm; return __mem_cgroup_try_charge(mm, mask, ptr, true);}void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr){ struct page_cgroup *pc; if (mem_cgroup_disabled()) return; if (!ptr) return; pc = lookup_page_cgroup(page); mem_cgroup_lru_del_before_commit_swapcache(page); __mem_cgroup_commit_charge(ptr, pc, MEM_CGROUP_CHARGE_TYPE_MAPPED); mem_cgroup_lru_add_after_commit_swapcache(page); /* * Now swap is on-memory. This means this page may be * counted both as mem and swap....double count. * Fix it by uncharging from memsw. Basically, this SwapCache is stable * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page() * may call delete_from_swap_cache() before reach here. */ if (do_swap_account && PageSwapCache(page)) { swp_entry_t ent = {.val = page_private(page)}; struct mem_cgroup *memcg; memcg = swap_cgroup_record(ent, NULL); if (memcg) { res_counter_uncharge(&memcg->memsw, PAGE_SIZE); mem_cgroup_put(memcg); } } /* add this page(page_cgroup) to the LRU we want. */}void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem){ if (mem_cgroup_disabled()) return; if (!mem) return; res_counter_uncharge(&mem->res, PAGE_SIZE); if (do_swap_account) res_counter_uncharge(&mem->memsw, PAGE_SIZE); css_put(&mem->css);}/* * uncharge if !page_mapped(page) */static struct mem_cgroup *__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype){ struct page_cgroup *pc; struct mem_cgroup *mem = NULL; struct mem_cgroup_per_zone *mz; if (mem_cgroup_disabled()) return NULL; if (PageSwapCache(page)) return NULL; /* * Check if our page_cgroup is valid */ pc = lookup_page_cgroup(page); if (unlikely(!pc || !PageCgroupUsed(pc))) return NULL; lock_page_cgroup(pc); mem = pc->mem_cgroup; if (!PageCgroupUsed(pc)) goto unlock_out; switch (ctype) { case MEM_CGROUP_CHARGE_TYPE_MAPPED: if (page_mapped(page)) goto unlock_out; break; case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: if (!PageAnon(page)) { /* Shared memory */ if (page->mapping && !page_is_file_cache(page)) goto unlock_out; } else if (page_mapped(page)) /* Anon */ goto unlock_out; break; default: break; } res_counter_uncharge(&mem->res, PAGE_SIZE); if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) res_counter_uncharge(&mem->memsw, PAGE_SIZE); mem_cgroup_charge_statistics(mem, pc, false); ClearPageCgroupUsed(pc); /* * pc->mem_cgroup is not cleared here. It will be accessed when it's * freed from LRU. This is safe because uncharged page is expected not * to be reused (freed soon). Exception is SwapCache, it's handled by * special functions. */ mz = page_cgroup_zoneinfo(pc); unlock_page_cgroup(pc); /* at swapout, this memcg will be accessed to record to swap */ if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) css_put(&mem->css); return mem;unlock_out: unlock_page_cgroup(pc); return NULL;}void mem_cgroup_uncharge_page(struct page *page){ /* early check. */ if (page_mapped(page)) return; if (page->mapping && !PageAnon(page)) return; __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);}void mem_cgroup_uncharge_cache_page(struct page *page){ VM_BUG_ON(page_mapped(page)); VM_BUG_ON(page->mapping); __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);}/* * called from __delete_from_swap_cache() and drop "page" account. * memcg information is recorded to swap_cgroup of "ent" */void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent){ struct mem_cgroup *memcg; memcg = __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_SWAPOUT); /* record memcg information */ if (do_swap_account && memcg) { swap_cgroup_record(ent, memcg); mem_cgroup_get(memcg); } if (memcg) css_put(&memcg->css);}#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP/* * called from swap_entry_free(). remove record in swap_cgroup and * uncharge "memsw" account. */void mem_cgroup_uncharge_swap(swp_entry_t ent){ struct mem_cgroup *memcg; if (!do_swap_account) return; memcg = swap_cgroup_record(ent, NULL); if (memcg) { res_counter_uncharge(&memcg->memsw, PAGE_SIZE); mem_cgroup_put(memcg); }}#endif/* * Before starting migration, account PAGE_SIZE to mem_cgroup that the old * page belongs to. */int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr){ struct page_cgroup *pc; struct mem_cgroup *mem = NULL; int ret = 0; if (mem_cgroup_disabled()) return 0; pc = lookup_page_cgroup(page); lock_page_cgroup(pc); if (PageCgroupUsed(pc)) { mem = pc->mem_cgroup; css_get(&mem->css); } unlock_page_cgroup(pc); if (mem) { ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false); css_put(&mem->css); } *ptr = mem; return ret;}/* remove redundant charge if migration failed*/void mem_cgroup_end_migration(struct mem_cgroup *mem, struct page *oldpage, struct page *newpage){ struct page *target, *unused; struct page_cgroup *pc; enum charge_type ctype; if (!mem) return; /* at migration success, oldpage->mapping is NULL. */ if (oldpage->mapping) { target = oldpage; unused = NULL; } else { target = newpage; unused = oldpage; } if (PageAnon(target)) ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; else if (page_is_file_cache(target)) ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; else ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; /* unused page is not on radix-tree now. */ if (unused) __mem_cgroup_uncharge_common(unused, ctype); pc = lookup_page_cgroup(target); /* * __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup. * So, double-counting is effectively avoided. */ __mem_cgroup_commit_charge(mem, pc, ctype); /* * Both of oldpage and newpage are still under lock_page(). * Then, we don't have to care about race in radix-tree. * But we have to be careful that this page is unmapped or not. * * There is a case for !page_mapped(). At the start of * migration, oldpage was mapped. But now, it's zapped. * But we know *target* page is not freed/reused under us. * mem_cgroup_uncharge_page() does all necessary checks. */ if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) mem_cgroup_uncharge_page(target);}/* * A call to try to shrink memory usage under specified resource controller. * This is typically used for page reclaiming for shmem for reducing side * effect of page allocation from shmem, which is used by some mem_cgroup. */int mem_cgroup_shrink_usage(struct page *page, struct mm_struct *mm, gfp_t gfp_mask){ struct mem_cgroup *mem = NULL; int progress = 0; int retry = MEM_CGROUP_RECLAIM_RETRIES; if (mem_cgroup_disabled()) return 0; if (page) mem = try_get_mem_cgroup_from_swapcache(page); if (!mem && mm) mem = try_get_mem_cgroup_from_mm(mm); if (unlikely(!mem)) return 0; do { progress = mem_cgroup_hierarchical_reclaim(mem, gfp_mask, true); progress += mem_cgroup_check_under_limit(mem); } while (!progress && --retry); css_put(&mem->css); if (!retry) return -ENOMEM; return 0;}static DEFINE_MUTEX(set_limit_mutex);static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val){ int retry_count = MEM_CGROUP_RECLAIM_RETRIES; int progress; u64 memswlimit; int ret = 0; while (retry_count) { if (signal_pending(current)) { ret = -EINTR; break; } /* * Rather than hide all in some function, I do this in * open coded manner. You see what this really does. * We have to guarantee mem->res.limit < mem->memsw.limit. */ mutex_lock(&set_limit_mutex); memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); if (memswlimit < val) { ret = -EINVAL; mutex_unlock(&set_limit_mutex); break; } ret = res_counter_set_limit(&memcg->res, val); mutex_unlock(&set_limit_mutex); if (!ret) break; progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, false); if (!progress) retry_count--; } return ret;}int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, unsigned long long val){ int retry_count = MEM_CGROUP_RECLAIM_RETRIES; u64 memlimit, oldusage, curusage; int ret; if (!do_swap_account) return -EINVAL; while (retry_count) { if (signal_pending(current)) { ret = -EINTR; break; } /* * Rather than hide all in some function, I do this in * open coded manner. You see what this really does. * We have to guarantee mem->res.limit < mem->memsw.limit. */ mutex_lock(&set_limit_mutex); memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT); if (memlimit > val) { ret = -EINVAL; mutex_unlock(&set_limit_mutex); break; } ret = res_counter_set_limit(&memcg->memsw, val); mutex_unlock(&set_limit_mutex); if (!ret) break; oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true); curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); if (curusage >= oldusage) retry_count--; } return ret;}/* * This routine traverse page_cgroup in given list and drop them all. * *And* this routine doesn't reclaim page itself, just removes page_cgroup. */static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, int node, int zid, enum lru_list lru){ struct zone *zone; struct mem_cgroup_per_zone *mz; struct page_cgroup *pc, *busy; unsigned long flags, loop; struct list_head *list; int ret = 0; zone = &NODE_DATA(node)->node_zones[zid]; mz = mem_cgroup_zoneinfo(mem, node, zid); list = &mz->lists[lru]; loop = MEM_CGROUP_ZSTAT(mz, lru); /* give some margin against EBUSY etc...*/ loop += 256; busy = NULL; while (loop--) { ret = 0; spin_lock_irqsave(&zone->lru_lock, flags); if (list_empty(list)) { spin_unlock_irqrestore(&zone->lru_lock, flags); break; } pc = list_entry(list->prev, struct page_cgroup, lru); if (busy == pc) { list_move(&pc->lru, list); busy = 0; spin_unlock_irqrestore(&zone->lru_lock, flags); continue; } spin_unlock_irqrestore(&zone->lru_lock, flags); ret = mem_cgroup_move_parent(pc, mem, GFP_KERNEL); if (ret == -ENOMEM) break; if (ret == -EBUSY || ret == -EINVAL) { /* found lock contention or "pc" is obsolete. */ busy = pc; cond_resched(); } else busy = NULL; } if (!ret && !list_empty(list)) return -EBUSY; return ret;}/* * make mem_cgroup's charge to be 0 if there is no task. * This enables deleting this mem_cgroup. */static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all){ int ret; int node, zid, shrink; int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; struct cgroup *cgrp = mem->css.cgroup; css_get(&mem->css); shrink = 0; /* should free all ? */ if (free_all) goto try_to_free;move_account: while (mem->res.usage > 0) { ret = -EBUSY; if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) goto out; ret = -EINTR; if (signal_pending(current)) goto out; /* This is for making all *used* pages to be on LRU. */ lru_add_drain_all(); ret = 0; for_each_node_state(node, N_HIGH_MEMORY) { for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { enum lru_list l; for_each_lru(l) { ret = mem_cgroup_force_empty_list(mem, node, zid, l); if (ret) break; } } if (ret) break; } /* it seems parent cgroup doesn't have enough mem */ if (ret == -ENOMEM) goto try_to_free; cond_resched(); } ret = 0;out: css_put(&mem->css); return ret;try_to_free: /* returns EBUSY if there is a task or if we come here twice. */ if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) { ret = -EBUSY; goto out; } /* we call try-to-free pages for make this cgroup empty */ lru_add_drain_all(); /* try to free all pages in this cgroup */ shrink = 1; while (nr_retries && mem->res.usage > 0) { int progress; if (signal_pending(current)) { ret = -EINTR; goto out; } progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, false, get_swappiness(mem)); if (!progress) { nr_retries--; /* maybe some writeback is necessary */ congestion_wait(WRITE, HZ/10); } } lru_add_drain(); /* try move_account...there may be some *locked* pages. */ if (mem->res.usage) goto move_account; ret = 0; goto out;}int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event){ return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true);}static u64 mem_cgroup_hierarchy_read(struct cgroup *cont, struct cftype *cft){ return mem_cgroup_from_cont(cont)->use_hierarchy;}static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, u64 val){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -