📄 slub.c
字号:
void (*ctor)(void *)){ return flags;}#define slub_debug 0static inline unsigned long slabs_node(struct kmem_cache *s, int node) { return 0; }static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) {}static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) {}#endif/* * Slab allocation and freeing */static inline struct page *alloc_slab_page(gfp_t flags, int node, struct kmem_cache_order_objects oo){ int order = oo_order(oo); if (node == -1) return alloc_pages(flags, order); else return alloc_pages_node(node, flags, order);}static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node){ struct page *page; struct kmem_cache_order_objects oo = s->oo; flags |= s->allocflags; page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node, oo); if (unlikely(!page)) { oo = s->min; /* * Allocation may have failed due to fragmentation. * Try a lower order alloc if possible */ page = alloc_slab_page(flags, node, oo); if (!page) return NULL; stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); } page->objects = oo_objects(oo); mod_zone_page_state(page_zone(page), (s->flags & SLAB_RECLAIM_ACCOUNT) ? NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1 << oo_order(oo)); return page;}static void setup_object(struct kmem_cache *s, struct page *page, void *object){ setup_object_debug(s, page, object); if (unlikely(s->ctor)) s->ctor(object);}static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node){ struct page *page; void *start; void *last; void *p; BUG_ON(flags & GFP_SLAB_BUG_MASK); page = allocate_slab(s, flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node); if (!page) goto out; inc_slabs_node(s, page_to_nid(page), page->objects); page->slab = s; page->flags |= 1 << PG_slab; if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | SLAB_TRACE)) __SetPageSlubDebug(page); start = page_address(page); if (unlikely(s->flags & SLAB_POISON)) memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page)); last = start; for_each_object(p, s, start, page->objects) { setup_object(s, page, last); set_freepointer(s, last, p); last = p; } setup_object(s, page, last); set_freepointer(s, last, NULL); page->freelist = start; page->inuse = 0;out: return page;}static void __free_slab(struct kmem_cache *s, struct page *page){ int order = compound_order(page); int pages = 1 << order; if (unlikely(SLABDEBUG && PageSlubDebug(page))) { void *p; slab_pad_check(s, page); for_each_object(p, s, page_address(page), page->objects) check_object(s, page, p, 0); __ClearPageSlubDebug(page); } mod_zone_page_state(page_zone(page), (s->flags & SLAB_RECLAIM_ACCOUNT) ? NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, -pages); __ClearPageSlab(page); reset_page_mapcount(page); __free_pages(page, order);}static void rcu_free_slab(struct rcu_head *h){ struct page *page; page = container_of((struct list_head *)h, struct page, lru); __free_slab(page->slab, page);}static void free_slab(struct kmem_cache *s, struct page *page){ if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { /* * RCU free overloads the RCU head over the LRU */ struct rcu_head *head = (void *)&page->lru; call_rcu(head, rcu_free_slab); } else __free_slab(s, page);}static void discard_slab(struct kmem_cache *s, struct page *page){ dec_slabs_node(s, page_to_nid(page), page->objects); free_slab(s, page);}/* * Per slab locking using the pagelock */static __always_inline void slab_lock(struct page *page){ bit_spin_lock(PG_locked, &page->flags);}static __always_inline void slab_unlock(struct page *page){ __bit_spin_unlock(PG_locked, &page->flags);}static __always_inline int slab_trylock(struct page *page){ int rc = 1; rc = bit_spin_trylock(PG_locked, &page->flags); return rc;}/* * Management of partially allocated slabs */static void add_partial(struct kmem_cache_node *n, struct page *page, int tail){ spin_lock(&n->list_lock); n->nr_partial++; if (tail) list_add_tail(&page->lru, &n->partial); else list_add(&page->lru, &n->partial); spin_unlock(&n->list_lock);}static void remove_partial(struct kmem_cache *s, struct page *page){ struct kmem_cache_node *n = get_node(s, page_to_nid(page)); spin_lock(&n->list_lock); list_del(&page->lru); n->nr_partial--; spin_unlock(&n->list_lock);}/* * Lock slab and remove from the partial list. * * Must hold list_lock. */static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page){ if (slab_trylock(page)) { list_del(&page->lru); n->nr_partial--; __SetPageSlubFrozen(page); return 1; } return 0;}/* * Try to allocate a partial slab from a specific node. */static struct page *get_partial_node(struct kmem_cache_node *n){ struct page *page; /* * Racy check. If we mistakenly see no partial slabs then we * just allocate an empty slab. If we mistakenly try to get a * partial slab and there is none available then get_partials() * will return NULL. */ if (!n || !n->nr_partial) return NULL; spin_lock(&n->list_lock); list_for_each_entry(page, &n->partial, lru) if (lock_and_freeze_slab(n, page)) goto out; page = NULL;out: spin_unlock(&n->list_lock); return page;}/* * Get a page from somewhere. Search in increasing NUMA distances. */static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags){#ifdef CONFIG_NUMA struct zonelist *zonelist; struct zoneref *z; struct zone *zone; enum zone_type high_zoneidx = gfp_zone(flags); struct page *page; /* * The defrag ratio allows a configuration of the tradeoffs between * inter node defragmentation and node local allocations. A lower * defrag_ratio increases the tendency to do local allocations * instead of attempting to obtain partial slabs from other nodes. * * If the defrag_ratio is set to 0 then kmalloc() always * returns node local objects. If the ratio is higher then kmalloc() * may return off node objects because partial slabs are obtained * from other nodes and filled up. * * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes * defrag_ratio = 1000) then every (well almost) allocation will * first attempt to defrag slab caches on other nodes. This means * scanning over all nodes to look for partial slabs which may be * expensive if we do it every time we are trying to find a slab * with available objects. */ if (!s->remote_node_defrag_ratio || get_cycles() % 1024 > s->remote_node_defrag_ratio) return NULL; zonelist = node_zonelist(slab_node(current->mempolicy), flags); for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { struct kmem_cache_node *n; n = get_node(s, zone_to_nid(zone)); if (n && cpuset_zone_allowed_hardwall(zone, flags) && n->nr_partial > n->min_partial) { page = get_partial_node(n); if (page) return page; } }#endif return NULL;}/* * Get a partial page, lock it and return it. */static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node){ struct page *page; int searchnode = (node == -1) ? numa_node_id() : node; page = get_partial_node(get_node(s, searchnode)); if (page || (flags & __GFP_THISNODE)) return page; return get_any_partial(s, flags);}/* * Move a page back to the lists. * * Must be called with the slab lock held. * * On exit the slab lock will have been dropped. */static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail){ struct kmem_cache_node *n = get_node(s, page_to_nid(page)); struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id()); __ClearPageSlubFrozen(page); if (page->inuse) { if (page->freelist) { add_partial(n, page, tail); stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); } else { stat(c, DEACTIVATE_FULL); if (SLABDEBUG && PageSlubDebug(page) && (s->flags & SLAB_STORE_USER)) add_full(n, page); } slab_unlock(page); } else { stat(c, DEACTIVATE_EMPTY); if (n->nr_partial < n->min_partial) { /* * Adding an empty slab to the partial slabs in order * to avoid page allocator overhead. This slab needs * to come after the other slabs with objects in * so that the others get filled first. That way the * size of the partial list stays small. * * kmem_cache_shrink can reclaim any empty slabs from * the partial list. */ add_partial(n, page, 1); slab_unlock(page); } else { slab_unlock(page); stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB); discard_slab(s, page); } }}/* * Remove the cpu slab */static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c){ struct page *page = c->page; int tail = 1; if (page->freelist) stat(c, DEACTIVATE_REMOTE_FREES); /* * Merge cpu freelist into slab freelist. Typically we get here * because both freelists are empty. So this is unlikely * to occur. */ while (unlikely(c->freelist)) { void **object; tail = 0; /* Hot objects. Put the slab first */ /* Retrieve object from cpu_freelist */ object = c->freelist; c->freelist = c->freelist[c->offset]; /* And put onto the regular freelist */ object[c->offset] = page->freelist; page->freelist = object; page->inuse--; } c->page = NULL; unfreeze_slab(s, page, tail);}static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c){ stat(c, CPUSLAB_FLUSH); slab_lock(c->page); deactivate_slab(s, c);}/* * Flush cpu slab. * * Called from IPI handler with interrupts disabled. */static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu){ struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); if (likely(c && c->page)) flush_slab(s, c);}static void flush_cpu_slab(void *d){ struct kmem_cache *s = d; __flush_cpu_slab(s, smp_processor_id());}static void flush_all(struct kmem_cache *s){ on_each_cpu(flush_cpu_slab, s, 1);}/* * Check if the objects in a per cpu structure fit numa * locality expectations. */static inline int node_match(struct kmem_cache_cpu *c, int node){#ifdef CONFIG_NUMA if (node != -1 && c->node != node) return 0;#endif return 1;}/* * Slow path. The lockless freelist is empty or we need to perform * debugging duties. * * Interrupts are disabled. * * Processing is still very fast if new objects have been freed to the * regular freelist. In that case we simply take over the regular freelist * as the lockless freelist and zap the regular freelist. * * If that is not working then we fall back to the partial lists. We take the * first element of the freelist as the object to allocate now and move the * rest of the freelist to the lockless freelist. * * And if we were unable to get a new slab from the partial slab lists then * we need to allocate a new slab. This is the slowest path since it involves * a call to the page allocator and the setup of a new slab. */static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr, struct kmem_cache_cpu *c){ void **object; struct page *new; /* We handle __GFP_ZERO in the caller */ gfpflags &= ~__GFP_ZERO; if (!c->page) goto new_slab; slab_lock(c->page); if (unlikely(!node_match(c, node))) goto another_slab; stat(c, ALLOC_REFILL);load_freelist: object = c->page->freelist; if (unlikely(!object)) goto another_slab; if (unlikely(SLABDEBUG && PageSlubDebug(c->page))) goto debug; c->freelist = object[c->offset]; c->page->inuse = c->page->objects; c->page->freelist = NULL; c->node = page_to_nid(c->page);unlock_out: slab_unlock(c->page); stat(c, ALLOC_SLOWPATH); return object;another_slab: deactivate_slab(s, c);new_slab: new = get_partial(s, gfpflags, node); if (new) { c->page = new; stat(c, ALLOC_FROM_PARTIAL); goto load_freelist; } if (gfpflags & __GFP_WAIT) local_irq_enable(); new = new_slab(s, gfpflags, node); if (gfpflags & __GFP_WAIT) local_irq_disable(); if (new) { c = get_cpu_slab(s, smp_processor_id()); stat(c, ALLOC_SLAB); if (c->page) flush_slab(s, c); slab_lock(new); __SetPageSlubFrozen(new); c->page = new; goto load_freelist; } return NULL;debug: if (!alloc_debug_processing(s, c->page, object, addr))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -