📄 slub.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* * SLUB: A slab allocator that limits cache line use instead of queuing * objects in per cpu and per node lists. * * The allocator synchronizes using per slab locks and only * uses a centralized lock to manage a pool of partial slabs. * * (C) 2007 SGI, Christoph Lameter */#include <linux/mm.h>#include <linux/module.h>#include <linux/bit_spinlock.h>#include <linux/interrupt.h>#include <linux/bitops.h>#include <linux/slab.h>#include <linux/proc_fs.h>#include <linux/seq_file.h>#include <linux/cpu.h>#include <linux/cpuset.h>#include <linux/mempolicy.h>#include <linux/ctype.h>#include <linux/debugobjects.h>#include <linux/kallsyms.h>#include <linux/memory.h>#include <linux/math64.h>#include <linux/fault-inject.h>/* * Lock order: *   1. slab_lock(page) *   2. slab->list_lock * *   The slab_lock protects operations on the object of a particular *   slab and its metadata in the page struct. If the slab lock *   has been taken then no allocations nor frees can be performed *   on the objects in the slab nor can the slab be added or removed *   from the partial or full lists since this would mean modifying *   the page_struct of the slab. * *   The list_lock protects the partial and full list on each node and *   the partial slab counter. If taken then no new slabs may be added or *   removed from the lists nor make the number of partial slabs be modified. *   (Note that the total number of slabs is an atomic value that may be *   modified without taking the list lock). * *   The list_lock is a centralized lock and thus we avoid taking it as *   much as possible. As long as SLUB does not have to handle partial *   slabs, operations can continue without any centralized lock. F.e. *   allocating a long series of objects that fill up slabs does not require *   the list lock. * *   The lock order is sometimes inverted when we are trying to get a slab *   off a list. We take the list_lock and then look for a page on the list *   to use. While we do that objects in the slabs may be freed. We can *   only operate on the slab if we have also taken the slab_lock. So we use *   a slab_trylock() on the slab. If trylock was successful then no frees *   can occur anymore and we can use the slab for allocations etc. If the *   slab_trylock() does not succeed then frees are in progress in the slab and *   we must stay away from it for a while since we may cause a bouncing *   cacheline if we try to acquire the lock. So go onto the next slab. *   If all pages are busy then we may allocate a new slab instead of reusing *   a partial slab. A new slab has noone operating on it and thus there is *   no danger of cacheline contention. * *   Interrupts are disabled during allocation and deallocation in order to *   make the slab allocator safe to use in the context of an irq. In addition *   interrupts are disabled to ensure that the processor does not change *   while handling per_cpu slabs, due to kernel preemption. * * SLUB assigns one slab for allocation to each processor. * Allocations only occur from these slabs called cpu slabs. * * Slabs with free elements are kept on a partial list and during regular * operations no list for full slabs is used. If an object in a full slab is * freed then the slab will show up again on the partial lists. * We track full slabs for debugging purposes though because otherwise we * cannot scan all objects. * * Slabs are freed when they become empty. Teardown and setup is * minimal so we rely on the page allocators per cpu caches for * fast frees and allocs. * * Overloading of page flags that are otherwise used for LRU management. * * PageActive 		The slab is frozen and exempt from list processing. * 			This means that the slab is dedicated to a purpose * 			such as satisfying allocations for a specific * 			processor. Objects may be freed in the slab while * 			it is frozen but slab_free will then skip the usual * 			list operations. It is up to the processor holding * 			the slab to integrate the slab into the slab lists * 			when the slab is no longer needed. * * 			One use of this flag is to mark slabs that are * 			used for allocations. Then such a slab becomes a cpu * 			slab. The cpu slab may be equipped with an additional * 			freelist that allows lockless access to * 			free objects in addition to the regular freelist * 			that requires the slab lock. * * PageError		Slab requires special handling due to debug * 			options set. This moves	slab handling out of * 			the fast path and disables lockless freelists. */#ifdef CONFIG_SLUB_DEBUG#define SLABDEBUG 1#else#define SLABDEBUG 0#endif/* * Issues still to be resolved: * * - Support PAGE_ALLOC_DEBUG. Should be easy to do. * * - Variable sizing of the per node arrays *//* Enable to test recovery from slab corruption on boot */#undef SLUB_RESILIENCY_TEST/* * Mininum number of partial slabs. These will be left on the partial * lists even if they are empty. kmem_cache_shrink may reclaim them. */#define MIN_PARTIAL 5/* * Maximum number of desirable partial slabs. * The existence of more partial slabs makes kmem_cache_shrink * sort the partial list by the number of objects in the. */#define MAX_PARTIAL 10#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \				SLAB_POISON | SLAB_STORE_USER)/* * Set of flags that will prevent slab merging */#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \		SLAB_TRACE | SLAB_DESTROY_BY_RCU)#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \		SLAB_CACHE_DMA)#ifndef ARCH_KMALLOC_MINALIGN#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)#endif#ifndef ARCH_SLAB_MINALIGN#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)#endif#define OO_SHIFT	16#define OO_MASK		((1 << OO_SHIFT) - 1)#define MAX_OBJS_PER_PAGE	65535 /* since page.objects is u16 *//* Internal SLUB flags */#define __OBJECT_POISON		0x80000000 /* Poison object */#define __SYSFS_ADD_DEFERRED	0x40000000 /* Not yet visible via sysfs */static int kmem_size = sizeof(struct kmem_cache);#ifdef CONFIG_SMPstatic struct notifier_block slab_notifier;#endifstatic enum {	DOWN,		/* No slab functionality available */	PARTIAL,	/* kmem_cache_open() works but kmalloc does not */	UP,		/* Everything works but does not show up in sysfs */	SYSFS		/* Sysfs up */} slab_state = DOWN;/* A list of all slab caches on the system */static DECLARE_RWSEM(slub_lock);static LIST_HEAD(slab_caches);/* * Tracking user of a slab. */struct track {	unsigned long addr;	/* Called from address */	int cpu;		/* Was running on cpu */	int pid;		/* Pid context */	unsigned long when;	/* When did the operation occur */};enum track_item { TRACK_ALLOC, TRACK_FREE };#ifdef CONFIG_SLUB_DEBUGstatic int sysfs_slab_add(struct kmem_cache *);static int sysfs_slab_alias(struct kmem_cache *, const char *);static void sysfs_slab_remove(struct kmem_cache *);#elsestatic inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)							{ return 0; }static inline void sysfs_slab_remove(struct kmem_cache *s){	kfree(s);}#endifstatic inline void stat(struct kmem_cache_cpu *c, enum stat_item si){#ifdef CONFIG_SLUB_STATS	c->stat[si]++;#endif}/******************************************************************** * 			Core slab cache functions *******************************************************************/int slab_is_available(void){	return slab_state >= UP;}static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node){#ifdef CONFIG_NUMA	return s->node[node];#else	return &s->local_node;#endif}static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu){#ifdef CONFIG_SMP	return s->cpu_slab[cpu];#else	return &s->cpu_slab;#endif}/* Verify that a pointer has an address that is valid within a slab page */static inline int check_valid_pointer(struct kmem_cache *s,				struct page *page, const void *object){	void *base;	if (!object)		return 1;	base = page_address(page);	if (object < base || object >= base + page->objects * s->size ||		(object - base) % s->size) {		return 0;	}	return 1;}/* * Slow version of get and set free pointer. * * This version requires touching the cache lines of kmem_cache which * we avoid to do in the fast alloc free paths. There we obtain the offset * from the page struct. */static inline void *get_freepointer(struct kmem_cache *s, void *object){	return *(void **)(object + s->offset);}static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp){	*(void **)(object + s->offset) = fp;}/* Loop over all objects in a slab */#define for_each_object(__p, __s, __addr, __objects) \	for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\			__p += (__s)->size)/* Scan freelist */#define for_each_free_object(__p, __s, __free) \	for (__p = (__free); __p; __p = get_freepointer((__s), __p))/* Determine object index from a given position */static inline int slab_index(void *p, struct kmem_cache *s, void *addr){	return (p - addr) / s->size;}static inline struct kmem_cache_order_objects oo_make(int order,						unsigned long size){	struct kmem_cache_order_objects x = {		(order << OO_SHIFT) + (PAGE_SIZE << order) / size	};	return x;}static inline int oo_order(struct kmem_cache_order_objects x){	return x.x >> OO_SHIFT;}static inline int oo_objects(struct kmem_cache_order_objects x){	return x.x & OO_MASK;}#ifdef CONFIG_SLUB_DEBUG/* * Debug settings: */#ifdef CONFIG_SLUB_DEBUG_ONstatic int slub_debug = DEBUG_DEFAULT_FLAGS;#elsestatic int slub_debug;#endifstatic char *slub_debug_slabs;/* * Object debugging */static void print_section(char *text, u8 *addr, unsigned int length){	int i, offset;	int newline = 1;	char ascii[17];	ascii[16] = 0;	for (i = 0; i < length; i++) {		if (newline) {			printk(KERN_ERR "%8s 0x%p: ", text, addr + i);			newline = 0;		}		printk(KERN_CONT " %02x", addr[i]);		offset = i % 16;		ascii[offset] = isgraph(addr[i]) ? addr[i] : '.';		if (offset == 15) {			printk(KERN_CONT " %s\n", ascii);			newline = 1;		}	}	if (!newline) {		i %= 16;		while (i < 16) {			printk(KERN_CONT "   ");			ascii[i] = ' ';			i++;		}		printk(KERN_CONT " %s\n", ascii);	}}static struct track *get_track(struct kmem_cache *s, void *object,	enum track_item alloc){	struct track *p;	if (s->offset)		p = object + s->offset + sizeof(void *);	else		p = object + s->inuse;	return p + alloc;}static void set_track(struct kmem_cache *s, void *object,			enum track_item alloc, unsigned long addr){	struct track *p;	if (s->offset)		p = object + s->offset + sizeof(void *);	else		p = object + s->inuse;	p += alloc;	if (addr) {		p->addr = addr;		p->cpu = smp_processor_id();		p->pid = current->pid;		p->when = jiffies;	} else		memset(p, 0, sizeof(struct track));}static void init_tracking(struct kmem_cache *s, void *object){	if (!(s->flags & SLAB_STORE_USER))		return;	set_track(s, object, TRACK_FREE, 0UL);	set_track(s, object, TRACK_ALLOC, 0UL);}static void print_track(const char *s, struct track *t){	if (!t->addr)		return;	printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",		s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);}static void print_tracking(struct kmem_cache *s, void *object){	if (!(s->flags & SLAB_STORE_USER))		return;	print_track("Allocated", get_track(s, object, TRACK_ALLOC));	print_track("Freed", get_track(s, object, TRACK_FREE));}static void print_page_info(struct page *page){	printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",		page, page->objects, page->inuse, page->freelist, page->flags);}static void slab_bug(struct kmem_cache *s, char *fmt, ...){	va_list args;	char buf[100];	va_start(args, fmt);	vsnprintf(buf, sizeof(buf), fmt, args);	va_end(args);	printk(KERN_ERR "========================================"			"=====================================\n");	printk(KERN_ERR "BUG %s: %s\n", s->name, buf);	printk(KERN_ERR "----------------------------------------"			"-------------------------------------\n\n");}static void slab_fix(struct kmem_cache *s, char *fmt, ...){	va_list args;	char buf[100];	va_start(args, fmt);	vsnprintf(buf, sizeof(buf), fmt, args);	va_end(args);	printk(KERN_ERR "FIX %s: %s\n", s->name, buf);}static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p){	unsigned int off;	/* Offset of last byte */	u8 *addr = page_address(page);	print_tracking(s, p);	print_page_info(page);	printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",			p, p - addr, get_freepointer(s, p));	if (p > addr + 16)		print_section("Bytes b4", p - 16, 16);	print_section("Object", p, min_t(unsigned long, s->objsize, PAGE_SIZE));	if (s->flags & SLAB_RED_ZONE)		print_section("Redzone", p + s->objsize,			s->inuse - s->objsize);	if (s->offset)		off = s->offset + sizeof(void *);	else		off = s->inuse;	if (s->flags & SLAB_STORE_USER)		off += 2 * sizeof(struct track);	if (off != s->size)		/* Beginning of the filler is the free pointer */		print_section("Padding", p + off, s->size - off);	dump_stack();}static void object_err(struct kmem_cache *s, struct page *page,			u8 *object, char *reason){	slab_bug(s, "%s", reason);	print_trailer(s, page, object);}static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...){	va_list args;	char buf[100];	va_start(args, fmt);	vsnprintf(buf, sizeof(buf), fmt, args);	va_end(args);	slab_bug(s, "%s", buf);	print_page_info(page);	dump_stack();}static void init_object(struct kmem_cache *s, void *object, int active){	u8 *p = object;	if (s->flags & __OBJECT_POISON) {		memset(p, POISON_FREE, s->objsize - 1);		p[s->objsize - 1] = POISON_END;	}	if (s->flags & SLAB_RED_ZONE)		memset(p + s->objsize,			active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE,			s->inuse - s->objsize);}
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -