📄 page_alloc.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
			zlc_active = 1;			did_zlc_setup = 1;		}	}	if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) {		/* Disable zlc cache for second zonelist scan */		zlc_active = 0;		goto zonelist_scan;	}	return page;}/* * This is the 'heart' of the zoned buddy allocator. */struct page *__alloc_pages_internal(gfp_t gfp_mask, unsigned int order,			struct zonelist *zonelist, nodemask_t *nodemask){	const gfp_t wait = gfp_mask & __GFP_WAIT;	enum zone_type high_zoneidx = gfp_zone(gfp_mask);	struct zoneref *z;	struct zone *zone;	struct page *page;	struct reclaim_state reclaim_state;	struct task_struct *p = current;	int do_retry;	int alloc_flags;	unsigned long did_some_progress;	unsigned long pages_reclaimed = 0;	might_sleep_if(wait);	if (should_fail_alloc_page(gfp_mask, order))		return NULL;restart:	z = zonelist->_zonerefs;  /* the list of zones suitable for gfp_mask */	if (unlikely(!z->zone)) {		/*		 * Happens if we have an empty zonelist as a result of		 * GFP_THISNODE being used on a memoryless node		 */		return NULL;	}	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,			zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET);	if (page)		goto got_pg;	/*	 * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and	 * __GFP_NOWARN set) should not cause reclaim since the subsystem	 * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim	 * using a larger set of nodes after it has established that the	 * allowed per node queues are empty and that nodes are	 * over allocated.	 */	if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)		goto nopage;	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)		wakeup_kswapd(zone, order);	/*	 * OK, we're below the kswapd watermark and have kicked background	 * reclaim. Now things get more complex, so set up alloc_flags according	 * to how we want to proceed.	 *	 * The caller may dip into page reserves a bit more if the caller	 * cannot run direct reclaim, or if the caller has realtime scheduling	 * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will	 * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH).	 */	alloc_flags = ALLOC_WMARK_MIN;	if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)		alloc_flags |= ALLOC_HARDER;	if (gfp_mask & __GFP_HIGH)		alloc_flags |= ALLOC_HIGH;	if (wait)		alloc_flags |= ALLOC_CPUSET;	/*	 * Go through the zonelist again. Let __GFP_HIGH and allocations	 * coming from realtime tasks go deeper into reserves.	 *	 * This is the last chance, in general, before the goto nopage.	 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.	 */	page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,						high_zoneidx, alloc_flags);	if (page)		goto got_pg;	/* This allocation should allow future memory freeing. */rebalance:	if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))			&& !in_interrupt()) {		if (!(gfp_mask & __GFP_NOMEMALLOC)) {nofail_alloc:			/* go through the zonelist yet again, ignoring mins */			page = get_page_from_freelist(gfp_mask, nodemask, order,				zonelist, high_zoneidx, ALLOC_NO_WATERMARKS);			if (page)				goto got_pg;			if (gfp_mask & __GFP_NOFAIL) {				congestion_wait(WRITE, HZ/50);				goto nofail_alloc;			}		}		goto nopage;	}	/* Atomic allocations - we can't balance anything */	if (!wait)		goto nopage;	cond_resched();	/* We now go into synchronous reclaim */	cpuset_memory_pressure_bump();	/*	 * The task's cpuset might have expanded its set of allowable nodes	 */	cpuset_update_task_memory_state();	p->flags |= PF_MEMALLOC;	reclaim_state.reclaimed_slab = 0;	p->reclaim_state = &reclaim_state;	did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);	p->reclaim_state = NULL;	p->flags &= ~PF_MEMALLOC;	cond_resched();	if (order != 0)		drain_all_pages();	if (likely(did_some_progress)) {		page = get_page_from_freelist(gfp_mask, nodemask, order,					zonelist, high_zoneidx, alloc_flags);		if (page)			goto got_pg;	} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {		if (!try_set_zone_oom(zonelist, gfp_mask)) {			schedule_timeout_uninterruptible(1);			goto restart;		}		/*		 * Go through the zonelist yet one more time, keep		 * very high watermark here, this is only to catch		 * a parallel oom killing, we must fail if we're still		 * under heavy pressure.		 */		page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask,			order, zonelist, high_zoneidx,			ALLOC_WMARK_HIGH|ALLOC_CPUSET);		if (page) {			clear_zonelist_oom(zonelist, gfp_mask);			goto got_pg;		}		/* The OOM killer will not help higher order allocs so fail */		if (order > PAGE_ALLOC_COSTLY_ORDER) {			clear_zonelist_oom(zonelist, gfp_mask);			goto nopage;		}		out_of_memory(zonelist, gfp_mask, order);		clear_zonelist_oom(zonelist, gfp_mask);		goto restart;	}	/*	 * Don't let big-order allocations loop unless the caller explicitly	 * requests that.  Wait for some write requests to complete then retry.	 *	 * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER	 * means __GFP_NOFAIL, but that may not be true in other	 * implementations.	 *	 * For order > PAGE_ALLOC_COSTLY_ORDER, if __GFP_REPEAT is	 * specified, then we retry until we no longer reclaim any pages	 * (above), or we've reclaimed an order of pages at least as	 * large as the allocation's order. In both cases, if the	 * allocation still fails, we stop retrying.	 */	pages_reclaimed += did_some_progress;	do_retry = 0;	if (!(gfp_mask & __GFP_NORETRY)) {		if (order <= PAGE_ALLOC_COSTLY_ORDER) {			do_retry = 1;		} else {			if (gfp_mask & __GFP_REPEAT &&				pages_reclaimed < (1 << order))					do_retry = 1;		}		if (gfp_mask & __GFP_NOFAIL)			do_retry = 1;	}	if (do_retry) {		congestion_wait(WRITE, HZ/50);		goto rebalance;	}nopage:	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {		printk(KERN_WARNING "%s: page allocation failure."			" order:%d, mode:0x%x\n",			p->comm, order, gfp_mask);		dump_stack();		show_mem();	}got_pg:	return page;}EXPORT_SYMBOL(__alloc_pages_internal);/* * Common helper functions. */unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order){	struct page * page;	page = alloc_pages(gfp_mask, order);	if (!page)		return 0;	return (unsigned long) page_address(page);}EXPORT_SYMBOL(__get_free_pages);unsigned long get_zeroed_page(gfp_t gfp_mask){	struct page * page;	/*	 * get_zeroed_page() returns a 32-bit address, which cannot represent	 * a highmem page	 */	VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);	page = alloc_pages(gfp_mask | __GFP_ZERO, 0);	if (page)		return (unsigned long) page_address(page);	return 0;}EXPORT_SYMBOL(get_zeroed_page);void __pagevec_free(struct pagevec *pvec){	int i = pagevec_count(pvec);	while (--i >= 0)		free_hot_cold_page(pvec->pages[i], pvec->cold);}void __free_pages(struct page *page, unsigned int order){	if (put_page_testzero(page)) {		if (order == 0)			free_hot_page(page);		else			__free_pages_ok(page, order);	}}EXPORT_SYMBOL(__free_pages);void free_pages(unsigned long addr, unsigned int order){	if (addr != 0) {		VM_BUG_ON(!virt_addr_valid((void *)addr));		__free_pages(virt_to_page((void *)addr), order);	}}EXPORT_SYMBOL(free_pages);/** * alloc_pages_exact - allocate an exact number physically-contiguous pages. * @size: the number of bytes to allocate * @gfp_mask: GFP flags for the allocation * * This function is similar to alloc_pages(), except that it allocates the * minimum number of pages to satisfy the request.  alloc_pages() can only * allocate memory in power-of-two pages. * * This function is also limited by MAX_ORDER. * * Memory allocated by this function must be released by free_pages_exact(). */void *alloc_pages_exact(size_t size, gfp_t gfp_mask){	unsigned int order = get_order(size);	unsigned long addr;	addr = __get_free_pages(gfp_mask, order);	if (addr) {		unsigned long alloc_end = addr + (PAGE_SIZE << order);		unsigned long used = addr + PAGE_ALIGN(size);		split_page(virt_to_page(addr), order);		while (used < alloc_end) {			free_page(used);			used += PAGE_SIZE;		}	}	return (void *)addr;}EXPORT_SYMBOL(alloc_pages_exact);/** * free_pages_exact - release memory allocated via alloc_pages_exact() * @virt: the value returned by alloc_pages_exact. * @size: size of allocation, same value as passed to alloc_pages_exact(). * * Release the memory allocated by a previous call to alloc_pages_exact. */void free_pages_exact(void *virt, size_t size){	unsigned long addr = (unsigned long)virt;	unsigned long end = addr + PAGE_ALIGN(size);	while (addr < end) {		free_page(addr);		addr += PAGE_SIZE;	}}EXPORT_SYMBOL(free_pages_exact);static unsigned int nr_free_zone_pages(int offset){	struct zoneref *z;	struct zone *zone;	/* Just pick one node, since fallback list is circular */	unsigned int sum = 0;	struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);	for_each_zone_zonelist(zone, z, zonelist, offset) {		unsigned long size = zone->present_pages;		unsigned long high = zone->pages_high;		if (size > high)			sum += size - high;	}	return sum;}/* * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL */unsigned int nr_free_buffer_pages(void){	return nr_free_zone_pages(gfp_zone(GFP_USER));}EXPORT_SYMBOL_GPL(nr_free_buffer_pages);/* * Amount of free RAM allocatable within all zones */unsigned int nr_free_pagecache_pages(void){	return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE));}static inline void show_node(struct zone *zone){	if (NUMA_BUILD)		printk("Node %d ", zone_to_nid(zone));}void si_meminfo(struct sysinfo *val){	val->totalram = totalram_pages;	val->sharedram = 0;	val->freeram = global_page_state(NR_FREE_PAGES);	val->bufferram = nr_blockdev_pages();	val->totalhigh = totalhigh_pages;	val->freehigh = nr_free_highpages();	val->mem_unit = PAGE_SIZE;}EXPORT_SYMBOL(si_meminfo);#ifdef CONFIG_NUMAvoid si_meminfo_node(struct sysinfo *val, int nid){	pg_data_t *pgdat = NODE_DATA(nid);	val->totalram = pgdat->node_present_pages;	val->freeram = node_page_state(nid, NR_FREE_PAGES);#ifdef CONFIG_HIGHMEM	val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;	val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],			NR_FREE_PAGES);#else	val->totalhigh = 0;	val->freehigh = 0;#endif	val->mem_unit = PAGE_SIZE;}#endif#define K(x) ((x) << (PAGE_SHIFT-10))/* * Show free area list (used inside shift_scroll-lock stuff) * We also calculate the percentage fragmentation. We do this by counting the * memory on each free list with the exception of the first item on the list. */void show_free_areas(void){	int cpu;	struct zone *zone;	for_each_zone(zone) {		if (!populated_zone(zone))			continue;		show_node(zone);		printk("%s per-cpu:\n", zone->name);		for_each_online_cpu(cpu) {			struct per_cpu_pageset *pageset;			pageset = zone_pcp(zone, cpu);			printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",			       cpu, pageset->pcp.high,			       pageset->pcp.batch, pageset->pcp.count);		}	}	printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"		" inactive_file:%lu"//TODO:  check/adjust line lengths#ifdef CONFIG_UNEVICTABLE_LRU		" unevictable:%lu"#endif		" dirty:%lu writeback:%lu unstable:%lu\n"		" free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",		global_page_state(NR_ACTIVE_ANON),		global_page_state(NR_ACTIVE_FILE),		global_page_state(NR_INACTIVE_ANON),		global_page_state(NR_INACTIVE_FILE),#ifdef CONFIG_UNEVICTABLE_LRU		global_page_state(NR_UNEVICTABLE),#endif		global_page_state(NR_FILE_DIRTY),		global_page_state(NR_WRITEBACK),		global_page_state(NR_UNSTABLE_NFS),		global_page_state(NR_FREE_PAGES),		global_page_state(NR_SLAB_RECLAIMABLE) +			global_page_state(NR_SLAB_UNRECLAIMABLE),		global_page_state(NR_FILE_MAPPED),		global_page_state(NR_PAGETABLE),		global_page_state(NR_BOUNCE));	for_each_zone(zone) {		int i;		if (!populated_zone(zone))			continue;		show_node(zone);		printk("%s"			" free:%lukB"			" min:%lukB"			" low:%lukB"			" high:%lukB"			" active_anon:%lukB"
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -