📄 page_alloc.c
字号:
zlc_active = 1; did_zlc_setup = 1; } } if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { /* Disable zlc cache for second zonelist scan */ zlc_active = 0; goto zonelist_scan; } return page;}/* * This is the 'heart' of the zoned buddy allocator. */struct page *__alloc_pages_internal(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, nodemask_t *nodemask){ const gfp_t wait = gfp_mask & __GFP_WAIT; enum zone_type high_zoneidx = gfp_zone(gfp_mask); struct zoneref *z; struct zone *zone; struct page *page; struct reclaim_state reclaim_state; struct task_struct *p = current; int do_retry; int alloc_flags; unsigned long did_some_progress; unsigned long pages_reclaimed = 0; might_sleep_if(wait); if (should_fail_alloc_page(gfp_mask, order)) return NULL;restart: z = zonelist->_zonerefs; /* the list of zones suitable for gfp_mask */ if (unlikely(!z->zone)) { /* * Happens if we have an empty zonelist as a result of * GFP_THISNODE being used on a memoryless node */ return NULL; } page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET); if (page) goto got_pg; /* * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and * __GFP_NOWARN set) should not cause reclaim since the subsystem * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim * using a larger set of nodes after it has established that the * allowed per node queues are empty and that nodes are * over allocated. */ if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) goto nopage; for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) wakeup_kswapd(zone, order); /* * OK, we're below the kswapd watermark and have kicked background * reclaim. Now things get more complex, so set up alloc_flags according * to how we want to proceed. * * The caller may dip into page reserves a bit more if the caller * cannot run direct reclaim, or if the caller has realtime scheduling * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH). */ alloc_flags = ALLOC_WMARK_MIN; if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait) alloc_flags |= ALLOC_HARDER; if (gfp_mask & __GFP_HIGH) alloc_flags |= ALLOC_HIGH; if (wait) alloc_flags |= ALLOC_CPUSET; /* * Go through the zonelist again. Let __GFP_HIGH and allocations * coming from realtime tasks go deeper into reserves. * * This is the last chance, in general, before the goto nopage. * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags); if (page) goto got_pg; /* This allocation should allow future memory freeing. */rebalance: if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) { if (!(gfp_mask & __GFP_NOMEMALLOC)) {nofail_alloc: /* go through the zonelist yet again, ignoring mins */ page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, ALLOC_NO_WATERMARKS); if (page) goto got_pg; if (gfp_mask & __GFP_NOFAIL) { congestion_wait(WRITE, HZ/50); goto nofail_alloc; } } goto nopage; } /* Atomic allocations - we can't balance anything */ if (!wait) goto nopage; cond_resched(); /* We now go into synchronous reclaim */ cpuset_memory_pressure_bump(); /* * The task's cpuset might have expanded its set of allowable nodes */ cpuset_update_task_memory_state(); p->flags |= PF_MEMALLOC; reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; did_some_progress = try_to_free_pages(zonelist, order, gfp_mask); p->reclaim_state = NULL; p->flags &= ~PF_MEMALLOC; cond_resched(); if (order != 0) drain_all_pages(); if (likely(did_some_progress)) { page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags); if (page) goto got_pg; } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { if (!try_set_zone_oom(zonelist, gfp_mask)) { schedule_timeout_uninterruptible(1); goto restart; } /* * Go through the zonelist yet one more time, keep * very high watermark here, this is only to catch * a parallel oom killing, we must fail if we're still * under heavy pressure. */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET); if (page) { clear_zonelist_oom(zonelist, gfp_mask); goto got_pg; } /* The OOM killer will not help higher order allocs so fail */ if (order > PAGE_ALLOC_COSTLY_ORDER) { clear_zonelist_oom(zonelist, gfp_mask); goto nopage; } out_of_memory(zonelist, gfp_mask, order); clear_zonelist_oom(zonelist, gfp_mask); goto restart; } /* * Don't let big-order allocations loop unless the caller explicitly * requests that. Wait for some write requests to complete then retry. * * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER * means __GFP_NOFAIL, but that may not be true in other * implementations. * * For order > PAGE_ALLOC_COSTLY_ORDER, if __GFP_REPEAT is * specified, then we retry until we no longer reclaim any pages * (above), or we've reclaimed an order of pages at least as * large as the allocation's order. In both cases, if the * allocation still fails, we stop retrying. */ pages_reclaimed += did_some_progress; do_retry = 0; if (!(gfp_mask & __GFP_NORETRY)) { if (order <= PAGE_ALLOC_COSTLY_ORDER) { do_retry = 1; } else { if (gfp_mask & __GFP_REPEAT && pages_reclaimed < (1 << order)) do_retry = 1; } if (gfp_mask & __GFP_NOFAIL) do_retry = 1; } if (do_retry) { congestion_wait(WRITE, HZ/50); goto rebalance; }nopage: if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { printk(KERN_WARNING "%s: page allocation failure." " order:%d, mode:0x%x\n", p->comm, order, gfp_mask); dump_stack(); show_mem(); }got_pg: return page;}EXPORT_SYMBOL(__alloc_pages_internal);/* * Common helper functions. */unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order){ struct page * page; page = alloc_pages(gfp_mask, order); if (!page) return 0; return (unsigned long) page_address(page);}EXPORT_SYMBOL(__get_free_pages);unsigned long get_zeroed_page(gfp_t gfp_mask){ struct page * page; /* * get_zeroed_page() returns a 32-bit address, which cannot represent * a highmem page */ VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); page = alloc_pages(gfp_mask | __GFP_ZERO, 0); if (page) return (unsigned long) page_address(page); return 0;}EXPORT_SYMBOL(get_zeroed_page);void __pagevec_free(struct pagevec *pvec){ int i = pagevec_count(pvec); while (--i >= 0) free_hot_cold_page(pvec->pages[i], pvec->cold);}void __free_pages(struct page *page, unsigned int order){ if (put_page_testzero(page)) { if (order == 0) free_hot_page(page); else __free_pages_ok(page, order); }}EXPORT_SYMBOL(__free_pages);void free_pages(unsigned long addr, unsigned int order){ if (addr != 0) { VM_BUG_ON(!virt_addr_valid((void *)addr)); __free_pages(virt_to_page((void *)addr), order); }}EXPORT_SYMBOL(free_pages);/** * alloc_pages_exact - allocate an exact number physically-contiguous pages. * @size: the number of bytes to allocate * @gfp_mask: GFP flags for the allocation * * This function is similar to alloc_pages(), except that it allocates the * minimum number of pages to satisfy the request. alloc_pages() can only * allocate memory in power-of-two pages. * * This function is also limited by MAX_ORDER. * * Memory allocated by this function must be released by free_pages_exact(). */void *alloc_pages_exact(size_t size, gfp_t gfp_mask){ unsigned int order = get_order(size); unsigned long addr; addr = __get_free_pages(gfp_mask, order); if (addr) { unsigned long alloc_end = addr + (PAGE_SIZE << order); unsigned long used = addr + PAGE_ALIGN(size); split_page(virt_to_page(addr), order); while (used < alloc_end) { free_page(used); used += PAGE_SIZE; } } return (void *)addr;}EXPORT_SYMBOL(alloc_pages_exact);/** * free_pages_exact - release memory allocated via alloc_pages_exact() * @virt: the value returned by alloc_pages_exact. * @size: size of allocation, same value as passed to alloc_pages_exact(). * * Release the memory allocated by a previous call to alloc_pages_exact. */void free_pages_exact(void *virt, size_t size){ unsigned long addr = (unsigned long)virt; unsigned long end = addr + PAGE_ALIGN(size); while (addr < end) { free_page(addr); addr += PAGE_SIZE; }}EXPORT_SYMBOL(free_pages_exact);static unsigned int nr_free_zone_pages(int offset){ struct zoneref *z; struct zone *zone; /* Just pick one node, since fallback list is circular */ unsigned int sum = 0; struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); for_each_zone_zonelist(zone, z, zonelist, offset) { unsigned long size = zone->present_pages; unsigned long high = zone->pages_high; if (size > high) sum += size - high; } return sum;}/* * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL */unsigned int nr_free_buffer_pages(void){ return nr_free_zone_pages(gfp_zone(GFP_USER));}EXPORT_SYMBOL_GPL(nr_free_buffer_pages);/* * Amount of free RAM allocatable within all zones */unsigned int nr_free_pagecache_pages(void){ return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE));}static inline void show_node(struct zone *zone){ if (NUMA_BUILD) printk("Node %d ", zone_to_nid(zone));}void si_meminfo(struct sysinfo *val){ val->totalram = totalram_pages; val->sharedram = 0; val->freeram = global_page_state(NR_FREE_PAGES); val->bufferram = nr_blockdev_pages(); val->totalhigh = totalhigh_pages; val->freehigh = nr_free_highpages(); val->mem_unit = PAGE_SIZE;}EXPORT_SYMBOL(si_meminfo);#ifdef CONFIG_NUMAvoid si_meminfo_node(struct sysinfo *val, int nid){ pg_data_t *pgdat = NODE_DATA(nid); val->totalram = pgdat->node_present_pages; val->freeram = node_page_state(nid, NR_FREE_PAGES);#ifdef CONFIG_HIGHMEM val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM], NR_FREE_PAGES);#else val->totalhigh = 0; val->freehigh = 0;#endif val->mem_unit = PAGE_SIZE;}#endif#define K(x) ((x) << (PAGE_SHIFT-10))/* * Show free area list (used inside shift_scroll-lock stuff) * We also calculate the percentage fragmentation. We do this by counting the * memory on each free list with the exception of the first item on the list. */void show_free_areas(void){ int cpu; struct zone *zone; for_each_zone(zone) { if (!populated_zone(zone)) continue; show_node(zone); printk("%s per-cpu:\n", zone->name); for_each_online_cpu(cpu) { struct per_cpu_pageset *pageset; pageset = zone_pcp(zone, cpu); printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", cpu, pageset->pcp.high, pageset->pcp.batch, pageset->pcp.count); } } printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n" " inactive_file:%lu"//TODO: check/adjust line lengths#ifdef CONFIG_UNEVICTABLE_LRU " unevictable:%lu"#endif " dirty:%lu writeback:%lu unstable:%lu\n" " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", global_page_state(NR_ACTIVE_ANON), global_page_state(NR_ACTIVE_FILE), global_page_state(NR_INACTIVE_ANON), global_page_state(NR_INACTIVE_FILE),#ifdef CONFIG_UNEVICTABLE_LRU global_page_state(NR_UNEVICTABLE),#endif global_page_state(NR_FILE_DIRTY), global_page_state(NR_WRITEBACK), global_page_state(NR_UNSTABLE_NFS), global_page_state(NR_FREE_PAGES), global_page_state(NR_SLAB_RECLAIMABLE) + global_page_state(NR_SLAB_UNRECLAIMABLE), global_page_state(NR_FILE_MAPPED), global_page_state(NR_PAGETABLE), global_page_state(NR_BOUNCE)); for_each_zone(zone) { int i; if (!populated_zone(zone)) continue; show_node(zone); printk("%s" " free:%lukB" " min:%lukB" " low:%lukB" " high:%lukB" " active_anon:%lukB"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -