📄 slab.c
字号:
(x)->high_mark = (x)->num_active; \ } while (0)#define STATS_INC_ERR(x) ((x)->errors++)#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)#define STATS_INC_NODEFREES(x) ((x)->node_frees++)#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)#define STATS_SET_FREEABLE(x, i) \ do { \ if ((x)->max_freeable < i) \ (x)->max_freeable = i; \ } while (0)#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)#else#define STATS_INC_ACTIVE(x) do { } while (0)#define STATS_DEC_ACTIVE(x) do { } while (0)#define STATS_INC_ALLOCED(x) do { } while (0)#define STATS_INC_GROWN(x) do { } while (0)#define STATS_ADD_REAPED(x,y) do { } while (0)#define STATS_SET_HIGH(x) do { } while (0)#define STATS_INC_ERR(x) do { } while (0)#define STATS_INC_NODEALLOCS(x) do { } while (0)#define STATS_INC_NODEFREES(x) do { } while (0)#define STATS_INC_ACOVERFLOW(x) do { } while (0)#define STATS_SET_FREEABLE(x, i) do { } while (0)#define STATS_INC_ALLOCHIT(x) do { } while (0)#define STATS_INC_ALLOCMISS(x) do { } while (0)#define STATS_INC_FREEHIT(x) do { } while (0)#define STATS_INC_FREEMISS(x) do { } while (0)#endif#if DEBUG/* * memory layout of objects: * 0 : objp * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that * the end of an object is aligned with the end of the real * allocation. Catches writes behind the end of the allocation. * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1: * redzone word. * cachep->obj_offset: The real object. * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address * [BYTES_PER_WORD long] */static int obj_offset(struct kmem_cache *cachep){ return cachep->obj_offset;}static int obj_size(struct kmem_cache *cachep){ return cachep->obj_size;}static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp){ BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); return (unsigned long long*) (objp + obj_offset(cachep) - sizeof(unsigned long long));}static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp){ BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); if (cachep->flags & SLAB_STORE_USER) return (unsigned long long *)(objp + cachep->buffer_size - sizeof(unsigned long long) - REDZONE_ALIGN); return (unsigned long long *) (objp + cachep->buffer_size - sizeof(unsigned long long));}static void **dbg_userword(struct kmem_cache *cachep, void *objp){ BUG_ON(!(cachep->flags & SLAB_STORE_USER)); return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);}#else#define obj_offset(x) 0#define obj_size(cachep) (cachep->buffer_size)#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})#endif/* * Do not go above this order unless 0 objects fit into the slab. */#define BREAK_GFP_ORDER_HI 1#define BREAK_GFP_ORDER_LO 0static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;/* * Functions for storing/retrieving the cachep and or slab from the page * allocator. These are used to find the slab an obj belongs to. With kfree(), * these are used to find the cache which an obj belongs to. */static inline void page_set_cache(struct page *page, struct kmem_cache *cache){ page->lru.next = (struct list_head *)cache;}static inline struct kmem_cache *page_get_cache(struct page *page){ page = compound_head(page); BUG_ON(!PageSlab(page)); return (struct kmem_cache *)page->lru.next;}static inline void page_set_slab(struct page *page, struct slab *slab){ page->lru.prev = (struct list_head *)slab;}static inline struct slab *page_get_slab(struct page *page){ BUG_ON(!PageSlab(page)); return (struct slab *)page->lru.prev;}static inline struct kmem_cache *virt_to_cache(const void *obj){ struct page *page = virt_to_head_page(obj); return page_get_cache(page);}static inline struct slab *virt_to_slab(const void *obj){ struct page *page = virt_to_head_page(obj); return page_get_slab(page);}static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, unsigned int idx){ return slab->s_mem + cache->buffer_size * idx;}/* * We want to avoid an expensive divide : (offset / cache->buffer_size) * Using the fact that buffer_size is a constant for a particular cache, * we can replace (offset / cache->buffer_size) by * reciprocal_divide(offset, cache->reciprocal_buffer_size) */static inline unsigned int obj_to_index(const struct kmem_cache *cache, const struct slab *slab, void *obj){ u32 offset = (obj - slab->s_mem); return reciprocal_divide(offset, cache->reciprocal_buffer_size);}/* * These are the default caches for kmalloc. Custom caches can have other sizes. */struct cache_sizes malloc_sizes[] = {#define CACHE(x) { .cs_size = (x) },#include <linux/kmalloc_sizes.h> CACHE(ULONG_MAX)#undef CACHE};EXPORT_SYMBOL(malloc_sizes);/* Must match cache_sizes above. Out of line to keep cache footprint low. */struct cache_names { char *name; char *name_dma;};static struct cache_names __initdata cache_names[] = {#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },#include <linux/kmalloc_sizes.h> {NULL,}#undef CACHE};static struct arraycache_init initarray_cache __initdata = { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };static struct arraycache_init initarray_generic = { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };/* internal cache of cache description objs */static struct kmem_cache cache_cache = { .batchcount = 1, .limit = BOOT_CPUCACHE_ENTRIES, .shared = 1, .buffer_size = sizeof(struct kmem_cache), .name = "kmem_cache",};#define BAD_ALIEN_MAGIC 0x01020304ul#ifdef CONFIG_LOCKDEP/* * Slab sometimes uses the kmalloc slabs to store the slab headers * for other slabs "off slab". * The locking for this is tricky in that it nests within the locks * of all other slabs in a few places; to deal with this special * locking we put on-slab caches into a separate lock-class. * * We set lock class for alien array caches which are up during init. * The lock annotation will be lost if all cpus of a node goes down and * then comes back up during hotplug */static struct lock_class_key on_slab_l3_key;static struct lock_class_key on_slab_alc_key;static inline void init_lock_keys(void){ int q; struct cache_sizes *s = malloc_sizes; while (s->cs_size != ULONG_MAX) { for_each_node(q) { struct array_cache **alc; int r; struct kmem_list3 *l3 = s->cs_cachep->nodelists[q]; if (!l3 || OFF_SLAB(s->cs_cachep)) continue; lockdep_set_class(&l3->list_lock, &on_slab_l3_key); alc = l3->alien; /* * FIXME: This check for BAD_ALIEN_MAGIC * should go away when common slab code is taught to * work even without alien caches. * Currently, non NUMA code returns BAD_ALIEN_MAGIC * for alloc_alien_cache, */ if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) continue; for_each_node(r) { if (alc[r]) lockdep_set_class(&alc[r]->lock, &on_slab_alc_key); } } s++; }}#elsestatic inline void init_lock_keys(void){}#endif/* * Guard access to the cache-chain. */static DEFINE_MUTEX(cache_chain_mutex);static struct list_head cache_chain;/* * chicken and egg problem: delay the per-cpu array allocation * until the general caches are up. */static enum { NONE, PARTIAL_AC, PARTIAL_L3, FULL} g_cpucache_up;/* * used by boot code to determine if it can use slab based allocator */int slab_is_available(void){ return g_cpucache_up == FULL;}static DEFINE_PER_CPU(struct delayed_work, reap_work);static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep){ return cachep->array[smp_processor_id()];}static inline struct kmem_cache *__find_general_cachep(size_t size, gfp_t gfpflags){ struct cache_sizes *csizep = malloc_sizes;#if DEBUG /* This happens if someone tries to call * kmem_cache_create(), or __kmalloc(), before * the generic caches are initialized. */ BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);#endif if (!size) return ZERO_SIZE_PTR; while (size > csizep->cs_size) csizep++; /* * Really subtle: The last entry with cs->cs_size==ULONG_MAX * has cs_{dma,}cachep==NULL. Thus no special case * for large kmalloc calls required. */#ifdef CONFIG_ZONE_DMA if (unlikely(gfpflags & GFP_DMA)) return csizep->cs_dmacachep;#endif return csizep->cs_cachep;}static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags){ return __find_general_cachep(size, gfpflags);}static size_t slab_mgmt_size(size_t nr_objs, size_t align){ return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);}/* * Calculate the number of objects and left-over bytes for a given buffer size. */static void cache_estimate(unsigned long gfporder, size_t buffer_size, size_t align, int flags, size_t *left_over, unsigned int *num){ int nr_objs; size_t mgmt_size; size_t slab_size = PAGE_SIZE << gfporder; /* * The slab management structure can be either off the slab or * on it. For the latter case, the memory allocated for a * slab is used for: * * - The struct slab * - One kmem_bufctl_t for each object * - Padding to respect alignment of @align * - @buffer_size bytes for each object * * If the slab management structure is off the slab, then the * alignment will already be calculated into the size. Because * the slabs are all pages aligned, the objects will be at the * correct alignment when allocated. */ if (flags & CFLGS_OFF_SLAB) { mgmt_size = 0; nr_objs = slab_size / buffer_size; if (nr_objs > SLAB_LIMIT) nr_objs = SLAB_LIMIT; } else { /* * Ignore padding for the initial guess. The padding * is at most @align-1 bytes, and @buffer_size is at * least @align. In the worst case, this result will * be one greater than the number of objects that fit * into the memory allocation when taking the padding * into account. */ nr_objs = (slab_size - sizeof(struct slab)) / (buffer_size + sizeof(kmem_bufctl_t)); /* * This calculated number will be either the right * amount, or one greater than what we want. */ if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size > slab_size) nr_objs--; if (nr_objs > SLAB_LIMIT) nr_objs = SLAB_LIMIT; mgmt_size = slab_mgmt_size(nr_objs, align); } *num = nr_objs; *left_over = slab_size - nr_objs*buffer_size - mgmt_size;}#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)static void __slab_error(const char *function, struct kmem_cache *cachep, char *msg){ printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", function, cachep->name, msg); dump_stack();}/* * By default on NUMA we use alien caches to stage the freeing of * objects allocated from other nodes. This causes massive memory * inefficiencies when using fake NUMA setup to split memory into a * large number of small nodes, so it can be disabled on the command * line */static int use_alien_caches __read_mostly = 1;static int numa_platform __read_mostly = 1;static int __init noaliencache_setup(char *s){ use_alien_caches = 0; return 1;}__setup("noaliencache", noaliencache_setup);#ifdef CONFIG_NUMA/* * Special reaping functions for NUMA systems called from cache_reap(). * These take care of doing round robin flushing of alien caches (containing * objects freed on different nodes from which they were allocated) and the * flushing of remote pcps by calling drain_node_pages. */static DEFINE_PER_CPU(unsigned long, reap_node);static void init_reap_node(int cpu){ int node; node = next_node(cpu_to_node(cpu), node_online_map); if (node == MAX_NUMNODES) node = first_node(node_online_map); per_cpu(reap_node, cpu) = node;}static void next_reap_node(void){ int node = __get_cpu_var(reap_node); node = next_node(node, node_online_map); if (unlikely(node >= MAX_NUMNODES)) node = first_node(node_online_map); __get_cpu_var(reap_node) = node;}#else#define init_reap_node(cpu) do { } while (0)#define next_reap_node(void) do { } while (0)#endif/* * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz * via the workqueue/eventd. * Add the CPU number into the expiration time to minimize the possibility of * the CPUs getting into lockstep and contending for the global cache chain * lock. */static void __cpuinit start_cpu_timer(int cpu){ struct delayed_work *reap_work = &per_cpu(reap_work, cpu); /* * When this gets called from do_initcalls via cpucache_init(), * init_workqueues() has already run, so keventd will be setup * at that time. */ if (keventd_up() && reap_work->work.func == NULL) { init_reap_node(cpu); INIT_DELAYED_WORK(reap_work, cache_reap); schedule_delayed_work_on(cpu, reap_work, __round_jiffies_relative(HZ, cpu)); }}static struct array_cache *alloc_arraycache(int node, int entries, int batchcount){ int memsize = sizeof(void *) * entries + sizeof(struct array_cache); struct array_cache *nc = NULL;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -