📄 oom_kill.c
字号:
/* * linux/mm/oom_kill.c * * Copyright (C) 1998,2000 Rik van Riel * Thanks go out to Claus Fischer for some serious inspiration and * for goading me into coding this file... * * The routines in this file are used to kill a process when * we're seriously out of memory. This gets called from kswapd() * in linux/mm/vmscan.c when we really run out of memory. * * Since we won't call these routines often (on a well-configured * machine) this file will double as a 'coding guide' and a signpost * for newbie kernel hackers. It features several pointers to major * kernel subsystems and hints as to where to find out what things do. */#include <linux/mm.h>#include <linux/sched.h>#include <linux/swap.h>#include <linux/timex.h>#ifdef CONFIG_MIZI#include <asm/hardware.h>#endif#ifndef CONFIG_NO_OOM_KILLER /* Nothing in this file is used *//* #define DEBUG *//** * int_sqrt - oom_kill.c internal function, rough approximation to sqrt * @x: integer of which to calculate the sqrt * * A very rough approximation to the sqrt() function. */static unsigned int int_sqrt(unsigned int x){ unsigned int out = x; while (x & ~(unsigned int)1) x >>=2, out >>=1; if (x) out -= out >> 2; return (out ? out : 1);} /** * oom_badness - calculate a numeric value for how bad this task has been * @p: task struct of which task we should calculate * * The formula used is relatively simple and documented inline in the * function. The main rationale is that we want to select a good task * to kill when we run out of memory. * * Good in this context means that: * 1) we lose the minimum amount of work done * 2) we recover a large amount of memory * 3) we don't kill anything innocent of eating tons of memory * 4) we want to kill the minimum amount of processes (one) * 5) we try to kill the process the user expects us to kill, this * algorithm has been meticulously tuned to meet the priniciple * of least surprise ... (be careful when you change it) */static int badness(struct task_struct *p){ int points, cpu_time, run_time; if (!p->mm) return 0; /* * The memory size of the process is the basis for the badness. */ points = p->mm->total_vm; /* * CPU time is in seconds and run time is in minutes. There is no * particular reason for this other than that it turned out to work * very well in practice. This is not safe against jiffie wraps * but we don't care _that_ much... */ cpu_time = (p->times.tms_utime + p->times.tms_stime) >> (SHIFT_HZ + 3); run_time = (jiffies - p->start_time) >> (SHIFT_HZ + 10); points /= int_sqrt(cpu_time); points /= int_sqrt(int_sqrt(run_time)); /* * Niced processes are most likely less important, so double * their badness points. */ if (p->nice > 0) points *= 2;#ifdef CONFIG_MIZI /* * We don't want to kill important linu@ processes such as server * and launcher. So we need to distinguish them from the others. */ if (!(cap_t(p->cap_effective) & CAP_TO_MASK(CAP_MIZI_OOMKILL))) points /= 16;#endif /* * Superuser processes are usually more important, so we make it * less likely that we kill those. */ if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_ADMIN) || p->uid == 0 || p->euid == 0) points /= 4; /* * We don't want to kill a process with direct hardware access. * Not only could that mess up the hardware, but usually users * tend to only have this flag set on applications they think * of as important. */ if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) points /= 4;#ifdef DEBUG printk(KERN_DEBUG "OOMkill: task %d (%s) got %d points\n", p->pid, p->comm, points);#endif return points;}/* * Simple selection loop. We chose the process with the highest * number of 'points'. We expect the caller will lock the tasklist. * * (not docbooked, we don't want this one cluttering up the manual) */static struct task_struct * select_bad_process(void){ int maxpoints = 0; struct task_struct *p = NULL; struct task_struct *chosen = NULL; for_each_task(p) { if (p->pid) { int points = badness(p); if (points > maxpoints) { chosen = p; maxpoints = points; } } } return chosen;}/** * We must be careful though to never send SIGKILL a process with * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that * we select a process with CAP_SYS_RAW_IO set). */void oom_kill_task(struct task_struct *p){ printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", p->pid, p->comm); /* * We give our sacrificial lamb high priority and access to * all the memory it needs. That way it should be able to * exit() and clear out its resources quickly... */ p->counter = 5 * HZ; p->flags |= PF_MEMALLOC | PF_MEMDIE; /* This process has hardware access, be more careful. */ if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) { force_sig(SIGTERM, p); } else { force_sig(SIGKILL, p); }}/** * oom_kill - kill the "best" process when we run out of memory * * If we run out of memory, we have the choice between either * killing a random task (bad), letting the system crash (worse) * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */static void oom_kill(void){ struct task_struct *p, *q; read_lock(&tasklist_lock); p = select_bad_process(); /* Found nothing?!?! Either we hang forever, or we panic. */ if (p == NULL) panic("Out of memory and no killable processes...\n"); /* kill all processes that share the ->mm (i.e. all threads) */ read_lock(&tasklist_lock); for_each_task(q) { if(q->mm == p->mm) oom_kill_task(q); } read_unlock(&tasklist_lock); /* * Make kswapd go out of the way, so "p" has a good chance of * killing itself before someone else gets the chance to ask * for more memory. */ current->policy |= SCHED_YIELD; schedule(); return;}/** * out_of_memory - is the system out of memory? */void out_of_memory(void){ static unsigned long first, last, count, lastkill; unsigned long now, since; /* * Enough swap space left? Not OOM. */ if (nr_swap_pages > 0) return; now = jiffies; since = now - last; last = now; /* * If it's been a long time since last failure, * we're not oom. */ last = now; if (since > 5*HZ) goto reset; /* * If we haven't tried for at least one second, * we're not really oom. */ since = now - first; if (since < HZ) return; /* * If we have gotten only a few failures, * we're not really oom. */ if (++count < 10) return; /* * If we just killed a process, wait a while * to give that task a chance to exit. This * avoids killing multiple processes needlessly. */ since = now - lastkill; if (since < HZ*5) return; #ifdef CONFIG_MIZI event_notify(SYSTEM_NOMEM);#endif /* * Ok, really out of memory. Kill something. */ lastkill = now; oom_kill();reset: first = now; count = 0;}#endif /* CONFIG_SWAP_OUT */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -