📄 slab.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
			(x)->high_mark = (x)->num_active;		\	} while (0)#define	STATS_INC_ERR(x)	((x)->errors++)#define	STATS_INC_NODEALLOCS(x)	((x)->node_allocs++)#define	STATS_INC_NODEFREES(x)	((x)->node_frees++)#define STATS_INC_ACOVERFLOW(x)   ((x)->node_overflow++)#define	STATS_SET_FREEABLE(x, i)					\	do {								\		if ((x)->max_freeable < i)				\			(x)->max_freeable = i;				\	} while (0)#define STATS_INC_ALLOCHIT(x)	atomic_inc(&(x)->allochit)#define STATS_INC_ALLOCMISS(x)	atomic_inc(&(x)->allocmiss)#define STATS_INC_FREEHIT(x)	atomic_inc(&(x)->freehit)#define STATS_INC_FREEMISS(x)	atomic_inc(&(x)->freemiss)#else#define	STATS_INC_ACTIVE(x)	do { } while (0)#define	STATS_DEC_ACTIVE(x)	do { } while (0)#define	STATS_INC_ALLOCED(x)	do { } while (0)#define	STATS_INC_GROWN(x)	do { } while (0)#define	STATS_ADD_REAPED(x,y)	do { } while (0)#define	STATS_SET_HIGH(x)	do { } while (0)#define	STATS_INC_ERR(x)	do { } while (0)#define	STATS_INC_NODEALLOCS(x)	do { } while (0)#define	STATS_INC_NODEFREES(x)	do { } while (0)#define STATS_INC_ACOVERFLOW(x)   do { } while (0)#define	STATS_SET_FREEABLE(x, i) do { } while (0)#define STATS_INC_ALLOCHIT(x)	do { } while (0)#define STATS_INC_ALLOCMISS(x)	do { } while (0)#define STATS_INC_FREEHIT(x)	do { } while (0)#define STATS_INC_FREEMISS(x)	do { } while (0)#endif#if DEBUG/* * memory layout of objects: * 0		: objp * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that * 		the end of an object is aligned with the end of the real * 		allocation. Catches writes behind the end of the allocation. * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1: * 		redzone word. * cachep->obj_offset: The real object. * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address *					[BYTES_PER_WORD long] */static int obj_offset(struct kmem_cache *cachep){	return cachep->obj_offset;}static int obj_size(struct kmem_cache *cachep){	return cachep->obj_size;}static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp){	BUG_ON(!(cachep->flags & SLAB_RED_ZONE));	return (unsigned long long*) (objp + obj_offset(cachep) -				      sizeof(unsigned long long));}static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp){	BUG_ON(!(cachep->flags & SLAB_RED_ZONE));	if (cachep->flags & SLAB_STORE_USER)		return (unsigned long long *)(objp + cachep->buffer_size -					      sizeof(unsigned long long) -					      REDZONE_ALIGN);	return (unsigned long long *) (objp + cachep->buffer_size -				       sizeof(unsigned long long));}static void **dbg_userword(struct kmem_cache *cachep, void *objp){	BUG_ON(!(cachep->flags & SLAB_STORE_USER));	return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);}#else#define obj_offset(x)			0#define obj_size(cachep)		(cachep->buffer_size)#define dbg_redzone1(cachep, objp)	({BUG(); (unsigned long long *)NULL;})#define dbg_redzone2(cachep, objp)	({BUG(); (unsigned long long *)NULL;})#define dbg_userword(cachep, objp)	({BUG(); (void **)NULL;})#endif/* * Do not go above this order unless 0 objects fit into the slab. */#define	BREAK_GFP_ORDER_HI	1#define	BREAK_GFP_ORDER_LO	0static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;/* * Functions for storing/retrieving the cachep and or slab from the page * allocator.  These are used to find the slab an obj belongs to.  With kfree(), * these are used to find the cache which an obj belongs to. */static inline void page_set_cache(struct page *page, struct kmem_cache *cache){	page->lru.next = (struct list_head *)cache;}static inline struct kmem_cache *page_get_cache(struct page *page){	page = compound_head(page);	BUG_ON(!PageSlab(page));	return (struct kmem_cache *)page->lru.next;}static inline void page_set_slab(struct page *page, struct slab *slab){	page->lru.prev = (struct list_head *)slab;}static inline struct slab *page_get_slab(struct page *page){	BUG_ON(!PageSlab(page));	return (struct slab *)page->lru.prev;}static inline struct kmem_cache *virt_to_cache(const void *obj){	struct page *page = virt_to_head_page(obj);	return page_get_cache(page);}static inline struct slab *virt_to_slab(const void *obj){	struct page *page = virt_to_head_page(obj);	return page_get_slab(page);}static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,				 unsigned int idx){	return slab->s_mem + cache->buffer_size * idx;}/* * We want to avoid an expensive divide : (offset / cache->buffer_size) *   Using the fact that buffer_size is a constant for a particular cache, *   we can replace (offset / cache->buffer_size) by *   reciprocal_divide(offset, cache->reciprocal_buffer_size) */static inline unsigned int obj_to_index(const struct kmem_cache *cache,					const struct slab *slab, void *obj){	u32 offset = (obj - slab->s_mem);	return reciprocal_divide(offset, cache->reciprocal_buffer_size);}/* * These are the default caches for kmalloc. Custom caches can have other sizes. */struct cache_sizes malloc_sizes[] = {#define CACHE(x) { .cs_size = (x) },#include <linux/kmalloc_sizes.h>	CACHE(ULONG_MAX)#undef CACHE};EXPORT_SYMBOL(malloc_sizes);/* Must match cache_sizes above. Out of line to keep cache footprint low. */struct cache_names {	char *name;	char *name_dma;};static struct cache_names __initdata cache_names[] = {#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },#include <linux/kmalloc_sizes.h>	{NULL,}#undef CACHE};static struct arraycache_init initarray_cache __initdata =    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };static struct arraycache_init initarray_generic =    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };/* internal cache of cache description objs */static struct kmem_cache cache_cache = {	.batchcount = 1,	.limit = BOOT_CPUCACHE_ENTRIES,	.shared = 1,	.buffer_size = sizeof(struct kmem_cache),	.name = "kmem_cache",};#define BAD_ALIEN_MAGIC 0x01020304ul#ifdef CONFIG_LOCKDEP/* * Slab sometimes uses the kmalloc slabs to store the slab headers * for other slabs "off slab". * The locking for this is tricky in that it nests within the locks * of all other slabs in a few places; to deal with this special * locking we put on-slab caches into a separate lock-class. * * We set lock class for alien array caches which are up during init. * The lock annotation will be lost if all cpus of a node goes down and * then comes back up during hotplug */static struct lock_class_key on_slab_l3_key;static struct lock_class_key on_slab_alc_key;static inline void init_lock_keys(void){	int q;	struct cache_sizes *s = malloc_sizes;	while (s->cs_size != ULONG_MAX) {		for_each_node(q) {			struct array_cache **alc;			int r;			struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];			if (!l3 || OFF_SLAB(s->cs_cachep))				continue;			lockdep_set_class(&l3->list_lock, &on_slab_l3_key);			alc = l3->alien;			/*			 * FIXME: This check for BAD_ALIEN_MAGIC			 * should go away when common slab code is taught to			 * work even without alien caches.			 * Currently, non NUMA code returns BAD_ALIEN_MAGIC			 * for alloc_alien_cache,			 */			if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)				continue;			for_each_node(r) {				if (alc[r])					lockdep_set_class(&alc[r]->lock,					     &on_slab_alc_key);			}		}		s++;	}}#elsestatic inline void init_lock_keys(void){}#endif/* * Guard access to the cache-chain. */static DEFINE_MUTEX(cache_chain_mutex);static struct list_head cache_chain;/* * chicken and egg problem: delay the per-cpu array allocation * until the general caches are up. */static enum {	NONE,	PARTIAL_AC,	PARTIAL_L3,	FULL} g_cpucache_up;/* * used by boot code to determine if it can use slab based allocator */int slab_is_available(void){	return g_cpucache_up == FULL;}static DEFINE_PER_CPU(struct delayed_work, reap_work);static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep){	return cachep->array[smp_processor_id()];}static inline struct kmem_cache *__find_general_cachep(size_t size,							gfp_t gfpflags){	struct cache_sizes *csizep = malloc_sizes;#if DEBUG	/* This happens if someone tries to call	 * kmem_cache_create(), or __kmalloc(), before	 * the generic caches are initialized.	 */	BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);#endif	if (!size)		return ZERO_SIZE_PTR;	while (size > csizep->cs_size)		csizep++;	/*	 * Really subtle: The last entry with cs->cs_size==ULONG_MAX	 * has cs_{dma,}cachep==NULL. Thus no special case	 * for large kmalloc calls required.	 */#ifdef CONFIG_ZONE_DMA	if (unlikely(gfpflags & GFP_DMA))		return csizep->cs_dmacachep;#endif	return csizep->cs_cachep;}static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags){	return __find_general_cachep(size, gfpflags);}static size_t slab_mgmt_size(size_t nr_objs, size_t align){	return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);}/* * Calculate the number of objects and left-over bytes for a given buffer size. */static void cache_estimate(unsigned long gfporder, size_t buffer_size,			   size_t align, int flags, size_t *left_over,			   unsigned int *num){	int nr_objs;	size_t mgmt_size;	size_t slab_size = PAGE_SIZE << gfporder;	/*	 * The slab management structure can be either off the slab or	 * on it. For the latter case, the memory allocated for a	 * slab is used for:	 *	 * - The struct slab	 * - One kmem_bufctl_t for each object	 * - Padding to respect alignment of @align	 * - @buffer_size bytes for each object	 *	 * If the slab management structure is off the slab, then the	 * alignment will already be calculated into the size. Because	 * the slabs are all pages aligned, the objects will be at the	 * correct alignment when allocated.	 */	if (flags & CFLGS_OFF_SLAB) {		mgmt_size = 0;		nr_objs = slab_size / buffer_size;		if (nr_objs > SLAB_LIMIT)			nr_objs = SLAB_LIMIT;	} else {		/*		 * Ignore padding for the initial guess. The padding		 * is at most @align-1 bytes, and @buffer_size is at		 * least @align. In the worst case, this result will		 * be one greater than the number of objects that fit		 * into the memory allocation when taking the padding		 * into account.		 */		nr_objs = (slab_size - sizeof(struct slab)) /			  (buffer_size + sizeof(kmem_bufctl_t));		/*		 * This calculated number will be either the right		 * amount, or one greater than what we want.		 */		if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size		       > slab_size)			nr_objs--;		if (nr_objs > SLAB_LIMIT)			nr_objs = SLAB_LIMIT;		mgmt_size = slab_mgmt_size(nr_objs, align);	}	*num = nr_objs;	*left_over = slab_size - nr_objs*buffer_size - mgmt_size;}#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)static void __slab_error(const char *function, struct kmem_cache *cachep,			char *msg){	printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",	       function, cachep->name, msg);	dump_stack();}/* * By default on NUMA we use alien caches to stage the freeing of * objects allocated from other nodes. This causes massive memory * inefficiencies when using fake NUMA setup to split memory into a * large number of small nodes, so it can be disabled on the command * line  */static int use_alien_caches __read_mostly = 1;static int numa_platform __read_mostly = 1;static int __init noaliencache_setup(char *s){	use_alien_caches = 0;	return 1;}__setup("noaliencache", noaliencache_setup);#ifdef CONFIG_NUMA/* * Special reaping functions for NUMA systems called from cache_reap(). * These take care of doing round robin flushing of alien caches (containing * objects freed on different nodes from which they were allocated) and the * flushing of remote pcps by calling drain_node_pages. */static DEFINE_PER_CPU(unsigned long, reap_node);static void init_reap_node(int cpu){	int node;	node = next_node(cpu_to_node(cpu), node_online_map);	if (node == MAX_NUMNODES)		node = first_node(node_online_map);	per_cpu(reap_node, cpu) = node;}static void next_reap_node(void){	int node = __get_cpu_var(reap_node);	node = next_node(node, node_online_map);	if (unlikely(node >= MAX_NUMNODES))		node = first_node(node_online_map);	__get_cpu_var(reap_node) = node;}#else#define init_reap_node(cpu) do { } while (0)#define next_reap_node(void) do { } while (0)#endif/* * Initiate the reap timer running on the target CPU.  We run at around 1 to 2Hz * via the workqueue/eventd. * Add the CPU number into the expiration time to minimize the possibility of * the CPUs getting into lockstep and contending for the global cache chain * lock. */static void __cpuinit start_cpu_timer(int cpu){	struct delayed_work *reap_work = &per_cpu(reap_work, cpu);	/*	 * When this gets called from do_initcalls via cpucache_init(),	 * init_workqueues() has already run, so keventd will be setup	 * at that time.	 */	if (keventd_up() && reap_work->work.func == NULL) {		init_reap_node(cpu);		INIT_DELAYED_WORK(reap_work, cache_reap);		schedule_delayed_work_on(cpu, reap_work,					__round_jiffies_relative(HZ, cpu));	}}static struct array_cache *alloc_arraycache(int node, int entries,					    int batchcount){	int memsize = sizeof(void *) * entries + sizeof(struct array_cache);	struct array_cache *nc = NULL;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -