📄 slab.c
字号:
/* * linux/mm/slab.c * Written by Mark Hemment, 1996/97. * (markhe@nextd.demon.co.uk) * * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli * * Major cleanup, different bufctl logic, per-cpu arrays * (c) 2000 Manfred Spraul * * An implementation of the Slab Allocator as described in outline in; * UNIX Internals: The New Frontiers by Uresh Vahalia * Pub: Prentice Hall ISBN 0-13-101908-2 * or with a little more detail in; * The Slab Allocator: An Object-Caching Kernel Memory Allocator * Jeff Bonwick (Sun Microsystems). * Presented at: USENIX Summer 1994 Technical Conference * * * The memory is organized in caches, one cache for each object type. * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct) * Each cache consists out of many slabs (they are small (usually one * page long) and always contiguous), and each slab contains multiple * initialized objects. * * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM, * normal). If you need a special memory type, then must create a new * cache for that memory type. * * In order to reduce fragmentation, the slabs are sorted in 3 groups: * full slabs with 0 free objects * partial slabs * empty slabs with no allocated objects * * If partial slabs exist, then new allocations come from these slabs, * otherwise from empty slabs or new slabs are allocated. * * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache * during kmem_cache_destroy(). The caller must prevent concurrent allocs. * * On SMP systems, each cache has a short per-cpu head array, most allocs * and frees go into that array, and if that array overflows, then 1/2 * of the entries in the array are given back into the global cache. * This reduces the number of spinlock operations. * * The c_cpuarray may not be read with enabled local interrupts. * * SMP synchronization: * constructors and destructors are called without any locking. * Several members in kmem_cache_t and slab_t never change, they * are accessed without any locking. * The per-cpu arrays are never accessed from the wrong cpu, no locking. * The non-constant members are protected with a per-cache irq spinlock. * * Further notes from the original documentation: * * 11 April '97. Started multi-threading - markhe * The global cache-chain is protected by the semaphore 'cache_chain_sem'. * The sem is only needed when accessing/extending the cache-chain, which * can never happen inside an interrupt (kmem_cache_create(), * kmem_cache_shrink() and kmem_cache_reap()). * * To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which * maybe be sleeping and therefore not holding the semaphore/lock), the * growing field is used. This also prevents reaping from a cache. * * At present, each engine can be growing a cache. This should be blocked. * */#include <linux/config.h>#include <linux/slab.h>#include <linux/interrupt.h>#include <linux/init.h>#include <linux/compiler.h>#include <asm/uaccess.h>/* * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL, * SLAB_RED_ZONE & SLAB_POISON. * 0 for faster, smaller code (especially in the critical paths). * * STATS - 1 to collect stats for /proc/slabinfo. * 0 for faster, smaller code (especially in the critical paths). * * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible) */#ifdef CONFIG_DEBUG_SLAB#define DEBUG 1#define STATS 1#define FORCED_DEBUG 1#else#define DEBUG 0#define STATS 0#define FORCED_DEBUG 0#endif/* * Parameters for kmem_cache_reap */#define REAP_SCANLEN 10#define REAP_PERFECT 10/* Shouldn't this be in a header file somewhere? */#define BYTES_PER_WORD sizeof(void *)/* Legal flag mask for kmem_cache_create(). */#if DEBUG# define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ SLAB_POISON | SLAB_HWCACHE_ALIGN | \ SLAB_NO_REAP | SLAB_CACHE_DMA | \ SLAB_MUST_HWCACHE_ALIGN)#else# define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \ SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN)#endif/* * kmem_bufctl_t: * * Bufctl's are used for linking objs within a slab * linked offsets. * * This implementaion relies on "struct page" for locating the cache & * slab an object belongs to. * This allows the bufctl structure to be small (one int), but limits * the number of objects a slab (not a cache) can contain when off-slab * bufctls are used. The limit is the size of the largest general cache * that does not use off-slab slabs. * For 32bit archs with 4 kB pages, is this 56. * This is not serious, as it is only for large objects, when it is unwise * to have too many per slab. * Note: This limit can be raised by introducing a general cache whose size * is less than 512 (PAGE_SIZE<<3), but greater than 256. */#define BUFCTL_END 0xffffFFFF#define SLAB_LIMIT 0xffffFFFEtypedef unsigned int kmem_bufctl_t;/* Max number of objs-per-slab for caches which use off-slab slabs. * Needed to avoid a possible looping condition in kmem_cache_grow(). */static unsigned long offslab_limit;/* * slab_t * * Manages the objs in a slab. Placed either at the beginning of mem allocated * for a slab, or allocated from an general cache. * Slabs are chained into three list: fully used, partial, fully free slabs. */typedef struct slab_s { struct list_head list; unsigned long colouroff; void *s_mem; /* including colour offset */ unsigned int inuse; /* num of objs active in slab */ kmem_bufctl_t free;} slab_t;#define slab_bufctl(slabp) \ ((kmem_bufctl_t *)(((slab_t*)slabp)+1))/* * cpucache_t * * Per cpu structures * The limit is stored in the per-cpu structure to reduce the data cache * footprint. */typedef struct cpucache_s { unsigned int avail; unsigned int limit;} cpucache_t;#define cc_entry(cpucache) \ ((void **)(((cpucache_t*)(cpucache))+1))#define cc_data(cachep) \ ((cachep)->cpudata[smp_processor_id()])/* * kmem_cache_t * * manages a cache. */#define CACHE_NAMELEN 20 /* max name length for a slab cache */struct kmem_cache_s {/* 1) each alloc & free */ /* full, partial first, then free */ struct list_head slabs_full; struct list_head slabs_partial; struct list_head slabs_free; unsigned int objsize; unsigned int flags; /* constant flags */ unsigned int num; /* # of objs per slab */ spinlock_t spinlock;#ifdef CONFIG_SMP unsigned int batchcount;#endif/* 2) slab additions /removals */ /* order of pgs per slab (2^n) */ unsigned int gfporder; /* force GFP flags, e.g. GFP_DMA */ unsigned int gfpflags; size_t colour; /* cache colouring range */ unsigned int colour_off; /* colour offset */ unsigned int colour_next; /* cache colouring */ kmem_cache_t *slabp_cache; unsigned int growing; unsigned int dflags; /* dynamic flags */ /* constructor func */ void (*ctor)(void *, kmem_cache_t *, unsigned long); /* de-constructor func */ void (*dtor)(void *, kmem_cache_t *, unsigned long); unsigned long failures;/* 3) cache creation/removal */ char name[CACHE_NAMELEN]; struct list_head next;#ifdef CONFIG_SMP/* 4) per-cpu data */ cpucache_t *cpudata[NR_CPUS];#endif#if STATS unsigned long num_active; unsigned long num_allocations; unsigned long high_mark; unsigned long grown; unsigned long reaped; unsigned long errors;#ifdef CONFIG_SMP atomic_t allochit; atomic_t allocmiss; atomic_t freehit; atomic_t freemiss;#endif#endif};/* internal c_flags */#define CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */#define CFLGS_OPTIMIZE 0x020000UL /* optimized slab lookup *//* c_dflags (dynamic flags). Need to hold the spinlock to access this member */#define DFLGS_GROWN 0x000001UL /* don't reap a recently grown */#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)#define OPTIMIZE(x) ((x)->flags & CFLGS_OPTIMIZE)#define GROWN(x) ((x)->dlags & DFLGS_GROWN)#if STATS#define STATS_INC_ACTIVE(x) ((x)->num_active++)#define STATS_DEC_ACTIVE(x) ((x)->num_active--)#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)#define STATS_INC_GROWN(x) ((x)->grown++)#define STATS_INC_REAPED(x) ((x)->reaped++)#define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \ (x)->high_mark = (x)->num_active; \ } while (0)#define STATS_INC_ERR(x) ((x)->errors++)#else#define STATS_INC_ACTIVE(x) do { } while (0)#define STATS_DEC_ACTIVE(x) do { } while (0)#define STATS_INC_ALLOCED(x) do { } while (0)#define STATS_INC_GROWN(x) do { } while (0)#define STATS_INC_REAPED(x) do { } while (0)#define STATS_SET_HIGH(x) do { } while (0)#define STATS_INC_ERR(x) do { } while (0)#endif#if STATS && defined(CONFIG_SMP)#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)#else#define STATS_INC_ALLOCHIT(x) do { } while (0)#define STATS_INC_ALLOCMISS(x) do { } while (0)#define STATS_INC_FREEHIT(x) do { } while (0)#define STATS_INC_FREEMISS(x) do { } while (0)#endif#if DEBUG/* Magic nums for obj red zoning. * Placed in the first word before and the first word after an obj. */#define RED_MAGIC1 0x5A2CF071UL /* when obj is active */#define RED_MAGIC2 0x170FC2A5UL /* when obj is inactive *//* ...and for poisoning */#define POISON_BYTE 0x5a /* byte value for poisoning */#define POISON_END 0xa5 /* end-byte of poisoning */#endif/* maximum size of an obj (in 2^order pages) */#define MAX_OBJ_ORDER 5 /* 32 pages *//* * Do not go above this order unless 0 objects fit into the slab. */#define BREAK_GFP_ORDER_HI 2#define BREAK_GFP_ORDER_LO 1static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;/* * Absolute limit for the gfp order */#define MAX_GFP_ORDER 5 /* 32 pages *//* Macros for storing/retrieving the cachep and or slab from the * global 'mem_map'. These are used to find the slab an obj belongs to. * With kfree(), these are used to find the cache which an obj belongs to. */#define SET_PAGE_CACHE(pg,x) ((pg)->list.next = (struct list_head *)(x))#define GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->list.next)#define SET_PAGE_SLAB(pg,x) ((pg)->list.prev = (struct list_head *)(x))#define GET_PAGE_SLAB(pg) ((slab_t *)(pg)->list.prev)/* Size description struct for general caches. */typedef struct cache_sizes { size_t cs_size; kmem_cache_t *cs_cachep; kmem_cache_t *cs_dmacachep;} cache_sizes_t;static cache_sizes_t cache_sizes[] = {#if PAGE_SIZE == 4096 { 32, NULL, NULL},#endif { 64, NULL, NULL}, { 128, NULL, NULL}, { 256, NULL, NULL}, { 512, NULL, NULL}, { 1024, NULL, NULL}, { 2048, NULL, NULL}, { 4096, NULL, NULL}, { 8192, NULL, NULL}, { 16384, NULL, NULL}, { 32768, NULL, NULL}, { 65536, NULL, NULL}, {131072, NULL, NULL}, { 0, NULL, NULL}};/* internal cache of cache description objs */static kmem_cache_t cache_cache = { slabs_full: LIST_HEAD_INIT(cache_cache.slabs_full), slabs_partial: LIST_HEAD_INIT(cache_cache.slabs_partial), slabs_free: LIST_HEAD_INIT(cache_cache.slabs_free), objsize: sizeof(kmem_cache_t), flags: SLAB_NO_REAP, spinlock: SPIN_LOCK_UNLOCKED, colour_off: L1_CACHE_BYTES, name: "kmem_cache",};/* Guard access to the cache-chain. */static struct semaphore cache_chain_sem;/* Place maintainer for reaping. */static kmem_cache_t *clock_searchp = &cache_cache;#define cache_chain (cache_cache.next)#ifdef CONFIG_SMP/* * chicken and egg problem: delay the per-cpu array allocation * until the general caches are up. */static int g_cpucache_up;static void enable_cpucache (kmem_cache_t *cachep);static void enable_all_cpucaches (void);#endif/* Cal the num objs, wastage, and bytes left over for a given slab size. */static void kmem_cache_estimate (unsigned long gfporder, size_t size, int flags, size_t *left_over, unsigned int *num){ int i; size_t wastage = PAGE_SIZE<<gfporder; size_t extra = 0; size_t base = 0; if (!(flags & CFLGS_OFF_SLAB)) { base = sizeof(slab_t); extra = sizeof(kmem_bufctl_t); } i = 0; while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage) i++; if (i > 0) i--; if (i > SLAB_LIMIT) i = SLAB_LIMIT; *num = i; wastage -= i*size; wastage -= L1_CACHE_ALIGN(base+i*extra); *left_over = wastage;}/* Initialisation - setup the `cache' cache. */void __init kmem_cache_init(void){ size_t left_over; init_MUTEX(&cache_chain_sem); INIT_LIST_HEAD(&cache_chain); kmem_cache_estimate(0, cache_cache.objsize, 0, &left_over, &cache_cache.num); if (!cache_cache.num) BUG(); cache_cache.colour = left_over/cache_cache.colour_off; cache_cache.colour_next = 0;}/* Initialisation - setup remaining internal and general caches. * Called after the gfp() functions have been enabled, and before smp_init(). */void __init kmem_cache_sizes_init(void){ cache_sizes_t *sizes = cache_sizes; char name[20]; /* * Fragmentation resistance on low memory - only use bigger * page orders on machines with more than 32MB of memory. */ if (num_physpages > (32 << 20) >> PAGE_SHIFT) slab_break_gfp_order = BREAK_GFP_ORDER_HI; do { /* For performance, all the general caches are L1 aligned. * This should be particularly beneficial on SMP boxes, as it * eliminates "false sharing". * Note for systems short on memory removing the alignment will * allow tighter packing of the smaller caches. */ sprintf(name,"size-%Zd",sizes->cs_size); if (!(sizes->cs_cachep = kmem_cache_create(name, sizes->cs_size, 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) { BUG(); } /* Inc off-slab bufctl limit until the ceiling is hit. */ if (!(OFF_SLAB(sizes->cs_cachep))) { offslab_limit = sizes->cs_size-sizeof(slab_t); offslab_limit /= 2; } sprintf(name, "size-%Zd(DMA)",sizes->cs_size); sizes->cs_dmacachep = kmem_cache_create(name, sizes->cs_size, 0, SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL); if (!sizes->cs_dmacachep) BUG(); sizes++; } while (sizes->cs_size);}int __init kmem_cpucache_init(void){#ifdef CONFIG_SMP g_cpucache_up = 1; enable_all_cpucaches();#endif return 0;}__initcall(kmem_cpucache_init);/* Interface to system's page allocator. No need to hold the cache-lock. */static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags){ void *addr; /* * If we requested dmaable memory, we will get it. Even if we * did not request dmaable memory, we might get it, but that * would be relatively rare and ignorable. */ flags |= cachep->gfpflags; addr = (void*) __get_free_pages(flags, cachep->gfporder); /* Assume that now we have the pages no one else can legally * messes with the 'struct page's. * However vm_scan() might try to test the structure to see if * it is a named-page or buffer-page. The members it tests are * of no interest here..... */ return addr;}/* Interface to system's page release. */static inline void kmem_freepages (kmem_cache_t *cachep, void *addr){ unsigned long i = (1<<cachep->gfporder); struct page *page = virt_to_page(addr); /* free_pages() does not clear the type bit - we do that.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -