📄 slab.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* * linux/mm/slab.c * Written by Mark Hemment, 1996/97. * (markhe@nextd.demon.co.uk) * * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli * * Major cleanup, different bufctl logic, per-cpu arrays *	(c) 2000 Manfred Spraul * * Cleanup, make the head arrays unconditional, preparation for NUMA * 	(c) 2002 Manfred Spraul * * An implementation of the Slab Allocator as described in outline in; *	UNIX Internals: The New Frontiers by Uresh Vahalia *	Pub: Prentice Hall	ISBN 0-13-101908-2 * or with a little more detail in; *	The Slab Allocator: An Object-Caching Kernel Memory Allocator *	Jeff Bonwick (Sun Microsystems). *	Presented at: USENIX Summer 1994 Technical Conference * * The memory is organized in caches, one cache for each object type. * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct) * Each cache consists out of many slabs (they are small (usually one * page long) and always contiguous), and each slab contains multiple * initialized objects. * * This means, that your constructor is used only for newly allocated * slabs and you must pass objects with the same initializations to * kmem_cache_free. * * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM, * normal). If you need a special memory type, then must create a new * cache for that memory type. * * In order to reduce fragmentation, the slabs are sorted in 3 groups: *   full slabs with 0 free objects *   partial slabs *   empty slabs with no allocated objects * * If partial slabs exist, then new allocations come from these slabs, * otherwise from empty slabs or new slabs are allocated. * * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache * during kmem_cache_destroy(). The caller must prevent concurrent allocs. * * Each cache has a short per-cpu head array, most allocs * and frees go into that array, and if that array overflows, then 1/2 * of the entries in the array are given back into the global cache. * The head array is strictly LIFO and should improve the cache hit rates. * On SMP, it additionally reduces the spinlock operations. * * The c_cpuarray may not be read with enabled local interrupts - * it's changed with a smp_call_function(). * * SMP synchronization: *  constructors and destructors are called without any locking. *  Several members in struct kmem_cache and struct slab never change, they *	are accessed without any locking. *  The per-cpu arrays are never accessed from the wrong cpu, no locking, *  	and local interrupts are disabled so slab code is preempt-safe. *  The non-constant members are protected with a per-cache irq spinlock. * * Many thanks to Mark Hemment, who wrote another per-cpu slab patch * in 2000 - many ideas in the current implementation are derived from * his patch. * * Further notes from the original documentation: * * 11 April '97.  Started multi-threading - markhe *	The global cache-chain is protected by the mutex 'cache_chain_mutex'. *	The sem is only needed when accessing/extending the cache-chain, which *	can never happen inside an interrupt (kmem_cache_create(), *	kmem_cache_shrink() and kmem_cache_reap()). * *	At present, each engine can be growing a cache.  This should be blocked. * * 15 March 2005. NUMA slab allocator. *	Shai Fultheim <shai@scalex86.org>. *	Shobhit Dayal <shobhit@calsoftinc.com> *	Alok N Kataria <alokk@calsoftinc.com> *	Christoph Lameter <christoph@lameter.com> * *	Modified the slab allocator to be node aware on NUMA systems. *	Each node has its own list of partial, free and full slabs. *	All object allocations for a node occur from node specific slab lists. */#include	<linux/slab.h>#include	<linux/mm.h>#include	<linux/poison.h>#include	<linux/swap.h>#include	<linux/cache.h>#include	<linux/interrupt.h>#include	<linux/init.h>#include	<linux/compiler.h>#include	<linux/cpuset.h>#include	<linux/proc_fs.h>#include	<linux/seq_file.h>#include	<linux/notifier.h>#include	<linux/kallsyms.h>#include	<linux/cpu.h>#include	<linux/sysctl.h>#include	<linux/module.h>#include	<linux/rcupdate.h>#include	<linux/string.h>#include	<linux/uaccess.h>#include	<linux/nodemask.h>#include	<linux/mempolicy.h>#include	<linux/mutex.h>#include	<linux/fault-inject.h>#include	<linux/rtmutex.h>#include	<linux/reciprocal_div.h>#include	<linux/debugobjects.h>#include	<asm/cacheflush.h>#include	<asm/tlbflush.h>#include	<asm/page.h>/* * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. *		  0 for faster, smaller code (especially in the critical paths). * * STATS	- 1 to collect stats for /proc/slabinfo. *		  0 for faster, smaller code (especially in the critical paths). * * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible) */#ifdef CONFIG_DEBUG_SLAB#define	DEBUG		1#define	STATS		1#define	FORCED_DEBUG	1#else#define	DEBUG		0#define	STATS		0#define	FORCED_DEBUG	0#endif/* Shouldn't this be in a header file somewhere? */#define	BYTES_PER_WORD		sizeof(void *)#define	REDZONE_ALIGN		max(BYTES_PER_WORD, __alignof__(unsigned long long))#ifndef ARCH_KMALLOC_MINALIGN/* * Enforce a minimum alignment for the kmalloc caches. * Usually, the kmalloc caches are cache_line_size() aligned, except when * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned. * Some archs want to perform DMA into kmalloc caches and need a guaranteed * alignment larger than the alignment of a 64-bit integer. * ARCH_KMALLOC_MINALIGN allows that. * Note that increasing this value may disable some debug features. */#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)#endif#ifndef ARCH_SLAB_MINALIGN/* * Enforce a minimum alignment for all caches. * Intended for archs that get misalignment faults even for BYTES_PER_WORD * aligned buffers. Includes ARCH_KMALLOC_MINALIGN. * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables * some debug features. */#define ARCH_SLAB_MINALIGN 0#endif#ifndef ARCH_KMALLOC_FLAGS#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN#endif/* Legal flag mask for kmem_cache_create(). */#if DEBUG# define CREATE_MASK	(SLAB_RED_ZONE | \			 SLAB_POISON | SLAB_HWCACHE_ALIGN | \			 SLAB_CACHE_DMA | \			 SLAB_STORE_USER | \			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \			 SLAB_DEBUG_OBJECTS)#else# define CREATE_MASK	(SLAB_HWCACHE_ALIGN | \			 SLAB_CACHE_DMA | \			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \			 SLAB_DEBUG_OBJECTS)#endif/* * kmem_bufctl_t: * * Bufctl's are used for linking objs within a slab * linked offsets. * * This implementation relies on "struct page" for locating the cache & * slab an object belongs to. * This allows the bufctl structure to be small (one int), but limits * the number of objects a slab (not a cache) can contain when off-slab * bufctls are used. The limit is the size of the largest general cache * that does not use off-slab slabs. * For 32bit archs with 4 kB pages, is this 56. * This is not serious, as it is only for large objects, when it is unwise * to have too many per slab. * Note: This limit can be raised by introducing a general cache whose size * is less than 512 (PAGE_SIZE<<3), but greater than 256. */typedef unsigned int kmem_bufctl_t;#define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)#define	BUFCTL_ACTIVE	(((kmem_bufctl_t)(~0U))-2)#define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-3)/* * struct slab * * Manages the objs in a slab. Placed either at the beginning of mem allocated * for a slab, or allocated from an general cache. * Slabs are chained into three list: fully used, partial, fully free slabs. */struct slab {	struct list_head list;	unsigned long colouroff;	void *s_mem;		/* including colour offset */	unsigned int inuse;	/* num of objs active in slab */	kmem_bufctl_t free;	unsigned short nodeid;};/* * struct slab_rcu * * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to * arrange for kmem_freepages to be called via RCU.  This is useful if * we need to approach a kernel structure obliquely, from its address * obtained without the usual locking.  We can lock the structure to * stabilize it and check it's still at the given address, only if we * can be sure that the memory has not been meanwhile reused for some * other kind of object (which our subsystem's lock might corrupt). * * rcu_read_lock before reading the address, then rcu_read_unlock after * taking the spinlock within the structure expected at that address. * * We assume struct slab_rcu can overlay struct slab when destroying. */struct slab_rcu {	struct rcu_head head;	struct kmem_cache *cachep;	void *addr;};/* * struct array_cache * * Purpose: * - LIFO ordering, to hand out cache-warm objects from _alloc * - reduce the number of linked list operations * - reduce spinlock operations * * The limit is stored in the per-cpu structure to reduce the data cache * footprint. * */struct array_cache {	unsigned int avail;	unsigned int limit;	unsigned int batchcount;	unsigned int touched;	spinlock_t lock;	void *entry[];	/*			 * Must have this definition in here for the proper			 * alignment of array_cache. Also simplifies accessing			 * the entries.			 */};/* * bootstrap: The caches do not work without cpuarrays anymore, but the * cpuarrays are allocated from the generic caches... */#define BOOT_CPUCACHE_ENTRIES	1struct arraycache_init {	struct array_cache cache;	void *entries[BOOT_CPUCACHE_ENTRIES];};/* * The slab lists for all objects. */struct kmem_list3 {	struct list_head slabs_partial;	/* partial list first, better asm code */	struct list_head slabs_full;	struct list_head slabs_free;	unsigned long free_objects;	unsigned int free_limit;	unsigned int colour_next;	/* Per-node cache coloring */	spinlock_t list_lock;	struct array_cache *shared;	/* shared per node */	struct array_cache **alien;	/* on other nodes */	unsigned long next_reap;	/* updated without locking */	int free_touched;		/* updated without locking */};/* * Need this for bootstrapping a per node allocator. */#define NUM_INIT_LISTS (3 * MAX_NUMNODES)struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];#define	CACHE_CACHE 0#define	SIZE_AC MAX_NUMNODES#define	SIZE_L3 (2 * MAX_NUMNODES)static int drain_freelist(struct kmem_cache *cache,			struct kmem_list3 *l3, int tofree);static void free_block(struct kmem_cache *cachep, void **objpp, int len,			int node);static int enable_cpucache(struct kmem_cache *cachep);static void cache_reap(struct work_struct *unused);/* * This function must be completely optimized away if a constant is passed to * it.  Mostly the same as what is in linux/slab.h except it returns an index. */static __always_inline int index_of(const size_t size){	extern void __bad_size(void);	if (__builtin_constant_p(size)) {		int i = 0;#define CACHE(x) \	if (size <=x) \		return i; \	else \		i++;#include <linux/kmalloc_sizes.h>#undef CACHE		__bad_size();	} else		__bad_size();	return 0;}static int slab_early_init = 1;#define INDEX_AC index_of(sizeof(struct arraycache_init))#define INDEX_L3 index_of(sizeof(struct kmem_list3))static void kmem_list3_init(struct kmem_list3 *parent){	INIT_LIST_HEAD(&parent->slabs_full);	INIT_LIST_HEAD(&parent->slabs_partial);	INIT_LIST_HEAD(&parent->slabs_free);	parent->shared = NULL;	parent->alien = NULL;	parent->colour_next = 0;	spin_lock_init(&parent->list_lock);	parent->free_objects = 0;	parent->free_touched = 0;}#define MAKE_LIST(cachep, listp, slab, nodeid)				\	do {								\		INIT_LIST_HEAD(listp);					\		list_splice(&(cachep->nodelists[nodeid]->slab), listp);	\	} while (0)#define	MAKE_ALL_LISTS(cachep, ptr, nodeid)				\	do {								\	MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid);	\	MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \	MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid);	\	} while (0)/* * struct kmem_cache * * manages a cache. */struct kmem_cache {/* 1) per-cpu data, touched during every alloc/free */	struct array_cache *array[NR_CPUS];/* 2) Cache tunables. Protected by cache_chain_mutex */	unsigned int batchcount;	unsigned int limit;	unsigned int shared;	unsigned int buffer_size;	u32 reciprocal_buffer_size;/* 3) touched by every alloc & free from the backend */	unsigned int flags;		/* constant flags */	unsigned int num;		/* # of objs per slab *//* 4) cache_grow/shrink */	/* order of pgs per slab (2^n) */	unsigned int gfporder;	/* force GFP flags, e.g. GFP_DMA */	gfp_t gfpflags;	size_t colour;			/* cache colouring range */	unsigned int colour_off;	/* colour offset */	struct kmem_cache *slabp_cache;	unsigned int slab_size;	unsigned int dflags;		/* dynamic flags */	/* constructor func */	void (*ctor)(void *obj);/* 5) cache creation/removal */	const char *name;	struct list_head next;/* 6) statistics */#if STATS	unsigned long num_active;	unsigned long num_allocations;	unsigned long high_mark;	unsigned long grown;	unsigned long reaped;	unsigned long errors;	unsigned long max_freeable;	unsigned long node_allocs;	unsigned long node_frees;	unsigned long node_overflow;	atomic_t allochit;	atomic_t allocmiss;	atomic_t freehit;	atomic_t freemiss;#endif#if DEBUG	/*	 * If debugging is enabled, then the allocator can add additional	 * fields and/or padding to every object. buffer_size contains the total	 * object size including these internal fields, the following two	 * variables contain the offset to the user object and its size.	 */	int obj_offset;	int obj_size;#endif	/*	 * We put nodelists[] at the end of kmem_cache, because we want to size	 * this array to nr_node_ids slots instead of MAX_NUMNODES	 * (see kmem_cache_init())	 * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache	 * is statically defined, so we reserve the max number of nodes.	 */	struct kmem_list3 *nodelists[MAX_NUMNODES];	/*	 * Do not add fields after nodelists[]	 */};#define CFLGS_OFF_SLAB		(0x80000000UL)#define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)#define BATCHREFILL_LIMIT	16/* * Optimization question: fewer reaps means less probability for unnessary * cpucache drain/refill cycles. * * OTOH the cpuarrays can contain lots of objects, * which could lock up otherwise freeable slabs. */#define REAPTIMEOUT_CPUC	(2*HZ)#define REAPTIMEOUT_LIST3	(4*HZ)#if STATS#define	STATS_INC_ACTIVE(x)	((x)->num_active++)#define	STATS_DEC_ACTIVE(x)	((x)->num_active--)#define	STATS_INC_ALLOCED(x)	((x)->num_allocations++)#define	STATS_INC_GROWN(x)	((x)->grown++)#define	STATS_ADD_REAPED(x,y)	((x)->reaped += (y))#define	STATS_SET_HIGH(x)						\	do {								\		if ((x)->num_active > (x)->high_mark)			\
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -