📄 slub.c
字号:
/* * SLUB: A slab allocator that limits cache line use instead of queuing * objects in per cpu and per node lists. * * The allocator synchronizes using per slab locks and only * uses a centralized lock to manage a pool of partial slabs. * * (C) 2007 SGI, Christoph Lameter */#include <linux/mm.h>#include <linux/module.h>#include <linux/bit_spinlock.h>#include <linux/interrupt.h>#include <linux/bitops.h>#include <linux/slab.h>#include <linux/proc_fs.h>#include <linux/seq_file.h>#include <linux/cpu.h>#include <linux/cpuset.h>#include <linux/mempolicy.h>#include <linux/ctype.h>#include <linux/debugobjects.h>#include <linux/kallsyms.h>#include <linux/memory.h>#include <linux/math64.h>#include <linux/fault-inject.h>/* * Lock order: * 1. slab_lock(page) * 2. slab->list_lock * * The slab_lock protects operations on the object of a particular * slab and its metadata in the page struct. If the slab lock * has been taken then no allocations nor frees can be performed * on the objects in the slab nor can the slab be added or removed * from the partial or full lists since this would mean modifying * the page_struct of the slab. * * The list_lock protects the partial and full list on each node and * the partial slab counter. If taken then no new slabs may be added or * removed from the lists nor make the number of partial slabs be modified. * (Note that the total number of slabs is an atomic value that may be * modified without taking the list lock). * * The list_lock is a centralized lock and thus we avoid taking it as * much as possible. As long as SLUB does not have to handle partial * slabs, operations can continue without any centralized lock. F.e. * allocating a long series of objects that fill up slabs does not require * the list lock. * * The lock order is sometimes inverted when we are trying to get a slab * off a list. We take the list_lock and then look for a page on the list * to use. While we do that objects in the slabs may be freed. We can * only operate on the slab if we have also taken the slab_lock. So we use * a slab_trylock() on the slab. If trylock was successful then no frees * can occur anymore and we can use the slab for allocations etc. If the * slab_trylock() does not succeed then frees are in progress in the slab and * we must stay away from it for a while since we may cause a bouncing * cacheline if we try to acquire the lock. So go onto the next slab. * If all pages are busy then we may allocate a new slab instead of reusing * a partial slab. A new slab has noone operating on it and thus there is * no danger of cacheline contention. * * Interrupts are disabled during allocation and deallocation in order to * make the slab allocator safe to use in the context of an irq. In addition * interrupts are disabled to ensure that the processor does not change * while handling per_cpu slabs, due to kernel preemption. * * SLUB assigns one slab for allocation to each processor. * Allocations only occur from these slabs called cpu slabs. * * Slabs with free elements are kept on a partial list and during regular * operations no list for full slabs is used. If an object in a full slab is * freed then the slab will show up again on the partial lists. * We track full slabs for debugging purposes though because otherwise we * cannot scan all objects. * * Slabs are freed when they become empty. Teardown and setup is * minimal so we rely on the page allocators per cpu caches for * fast frees and allocs. * * Overloading of page flags that are otherwise used for LRU management. * * PageActive The slab is frozen and exempt from list processing. * This means that the slab is dedicated to a purpose * such as satisfying allocations for a specific * processor. Objects may be freed in the slab while * it is frozen but slab_free will then skip the usual * list operations. It is up to the processor holding * the slab to integrate the slab into the slab lists * when the slab is no longer needed. * * One use of this flag is to mark slabs that are * used for allocations. Then such a slab becomes a cpu * slab. The cpu slab may be equipped with an additional * freelist that allows lockless access to * free objects in addition to the regular freelist * that requires the slab lock. * * PageError Slab requires special handling due to debug * options set. This moves slab handling out of * the fast path and disables lockless freelists. */#ifdef CONFIG_SLUB_DEBUG#define SLABDEBUG 1#else#define SLABDEBUG 0#endif/* * Issues still to be resolved: * * - Support PAGE_ALLOC_DEBUG. Should be easy to do. * * - Variable sizing of the per node arrays *//* Enable to test recovery from slab corruption on boot */#undef SLUB_RESILIENCY_TEST/* * Mininum number of partial slabs. These will be left on the partial * lists even if they are empty. kmem_cache_shrink may reclaim them. */#define MIN_PARTIAL 5/* * Maximum number of desirable partial slabs. * The existence of more partial slabs makes kmem_cache_shrink * sort the partial list by the number of objects in the. */#define MAX_PARTIAL 10#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ SLAB_POISON | SLAB_STORE_USER)/* * Set of flags that will prevent slab merging */#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ SLAB_TRACE | SLAB_DESTROY_BY_RCU)#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ SLAB_CACHE_DMA)#ifndef ARCH_KMALLOC_MINALIGN#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)#endif#ifndef ARCH_SLAB_MINALIGN#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)#endif#define OO_SHIFT 16#define OO_MASK ((1 << OO_SHIFT) - 1)#define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 *//* Internal SLUB flags */#define __OBJECT_POISON 0x80000000 /* Poison object */#define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */static int kmem_size = sizeof(struct kmem_cache);#ifdef CONFIG_SMPstatic struct notifier_block slab_notifier;#endifstatic enum { DOWN, /* No slab functionality available */ PARTIAL, /* kmem_cache_open() works but kmalloc does not */ UP, /* Everything works but does not show up in sysfs */ SYSFS /* Sysfs up */} slab_state = DOWN;/* A list of all slab caches on the system */static DECLARE_RWSEM(slub_lock);static LIST_HEAD(slab_caches);/* * Tracking user of a slab. */struct track { unsigned long addr; /* Called from address */ int cpu; /* Was running on cpu */ int pid; /* Pid context */ unsigned long when; /* When did the operation occur */};enum track_item { TRACK_ALLOC, TRACK_FREE };#ifdef CONFIG_SLUB_DEBUGstatic int sysfs_slab_add(struct kmem_cache *);static int sysfs_slab_alias(struct kmem_cache *, const char *);static void sysfs_slab_remove(struct kmem_cache *);#elsestatic inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) { return 0; }static inline void sysfs_slab_remove(struct kmem_cache *s){ kfree(s);}#endifstatic inline void stat(struct kmem_cache_cpu *c, enum stat_item si){#ifdef CONFIG_SLUB_STATS c->stat[si]++;#endif}/******************************************************************** * Core slab cache functions *******************************************************************/int slab_is_available(void){ return slab_state >= UP;}static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node){#ifdef CONFIG_NUMA return s->node[node];#else return &s->local_node;#endif}static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu){#ifdef CONFIG_SMP return s->cpu_slab[cpu];#else return &s->cpu_slab;#endif}/* Verify that a pointer has an address that is valid within a slab page */static inline int check_valid_pointer(struct kmem_cache *s, struct page *page, const void *object){ void *base; if (!object) return 1; base = page_address(page); if (object < base || object >= base + page->objects * s->size || (object - base) % s->size) { return 0; } return 1;}/* * Slow version of get and set free pointer. * * This version requires touching the cache lines of kmem_cache which * we avoid to do in the fast alloc free paths. There we obtain the offset * from the page struct. */static inline void *get_freepointer(struct kmem_cache *s, void *object){ return *(void **)(object + s->offset);}static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp){ *(void **)(object + s->offset) = fp;}/* Loop over all objects in a slab */#define for_each_object(__p, __s, __addr, __objects) \ for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\ __p += (__s)->size)/* Scan freelist */#define for_each_free_object(__p, __s, __free) \ for (__p = (__free); __p; __p = get_freepointer((__s), __p))/* Determine object index from a given position */static inline int slab_index(void *p, struct kmem_cache *s, void *addr){ return (p - addr) / s->size;}static inline struct kmem_cache_order_objects oo_make(int order, unsigned long size){ struct kmem_cache_order_objects x = { (order << OO_SHIFT) + (PAGE_SIZE << order) / size }; return x;}static inline int oo_order(struct kmem_cache_order_objects x){ return x.x >> OO_SHIFT;}static inline int oo_objects(struct kmem_cache_order_objects x){ return x.x & OO_MASK;}#ifdef CONFIG_SLUB_DEBUG/* * Debug settings: */#ifdef CONFIG_SLUB_DEBUG_ONstatic int slub_debug = DEBUG_DEFAULT_FLAGS;#elsestatic int slub_debug;#endifstatic char *slub_debug_slabs;/* * Object debugging */static void print_section(char *text, u8 *addr, unsigned int length){ int i, offset; int newline = 1; char ascii[17]; ascii[16] = 0; for (i = 0; i < length; i++) { if (newline) { printk(KERN_ERR "%8s 0x%p: ", text, addr + i); newline = 0; } printk(KERN_CONT " %02x", addr[i]); offset = i % 16; ascii[offset] = isgraph(addr[i]) ? addr[i] : '.'; if (offset == 15) { printk(KERN_CONT " %s\n", ascii); newline = 1; } } if (!newline) { i %= 16; while (i < 16) { printk(KERN_CONT " "); ascii[i] = ' '; i++; } printk(KERN_CONT " %s\n", ascii); }}static struct track *get_track(struct kmem_cache *s, void *object, enum track_item alloc){ struct track *p; if (s->offset) p = object + s->offset + sizeof(void *); else p = object + s->inuse; return p + alloc;}static void set_track(struct kmem_cache *s, void *object, enum track_item alloc, unsigned long addr){ struct track *p; if (s->offset) p = object + s->offset + sizeof(void *); else p = object + s->inuse; p += alloc; if (addr) { p->addr = addr; p->cpu = smp_processor_id(); p->pid = current->pid; p->when = jiffies; } else memset(p, 0, sizeof(struct track));}static void init_tracking(struct kmem_cache *s, void *object){ if (!(s->flags & SLAB_STORE_USER)) return; set_track(s, object, TRACK_FREE, 0UL); set_track(s, object, TRACK_ALLOC, 0UL);}static void print_track(const char *s, struct track *t){ if (!t->addr) return; printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n", s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);}static void print_tracking(struct kmem_cache *s, void *object){ if (!(s->flags & SLAB_STORE_USER)) return; print_track("Allocated", get_track(s, object, TRACK_ALLOC)); print_track("Freed", get_track(s, object, TRACK_FREE));}static void print_page_info(struct page *page){ printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n", page, page->objects, page->inuse, page->freelist, page->flags);}static void slab_bug(struct kmem_cache *s, char *fmt, ...){ va_list args; char buf[100]; va_start(args, fmt); vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); printk(KERN_ERR "========================================" "=====================================\n"); printk(KERN_ERR "BUG %s: %s\n", s->name, buf); printk(KERN_ERR "----------------------------------------" "-------------------------------------\n\n");}static void slab_fix(struct kmem_cache *s, char *fmt, ...){ va_list args; char buf[100]; va_start(args, fmt); vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); printk(KERN_ERR "FIX %s: %s\n", s->name, buf);}static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p){ unsigned int off; /* Offset of last byte */ u8 *addr = page_address(page); print_tracking(s, p); print_page_info(page); printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", p, p - addr, get_freepointer(s, p)); if (p > addr + 16) print_section("Bytes b4", p - 16, 16); print_section("Object", p, min_t(unsigned long, s->objsize, PAGE_SIZE)); if (s->flags & SLAB_RED_ZONE) print_section("Redzone", p + s->objsize, s->inuse - s->objsize); if (s->offset) off = s->offset + sizeof(void *); else off = s->inuse; if (s->flags & SLAB_STORE_USER) off += 2 * sizeof(struct track); if (off != s->size) /* Beginning of the filler is the free pointer */ print_section("Padding", p + off, s->size - off); dump_stack();}static void object_err(struct kmem_cache *s, struct page *page, u8 *object, char *reason){ slab_bug(s, "%s", reason); print_trailer(s, page, object);}static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...){ va_list args; char buf[100]; va_start(args, fmt); vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); slab_bug(s, "%s", buf); print_page_info(page); dump_stack();}static void init_object(struct kmem_cache *s, void *object, int active){ u8 *p = object; if (s->flags & __OBJECT_POISON) { memset(p, POISON_FREE, s->objsize - 1); p[s->objsize - 1] = POISON_END; } if (s->flags & SLAB_RED_ZONE) memset(p + s->objsize, active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE, s->inuse - s->objsize);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -