📄 vmscan.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
				continue;			if (nr_slab == 0 && zone->pages_scanned >=						(zone_lru_pages(zone) * 6))					zone_set_flag(zone,						      ZONE_ALL_UNRECLAIMABLE);			/*			 * If we've done a decent amount of scanning and			 * the reclaim ratio is low, start doing writepage			 * even in laptop mode			 */			if (total_scanned > SWAP_CLUSTER_MAX * 2 &&			    total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)				sc.may_writepage = 1;		}		if (all_zones_ok)			break;		/* kswapd: all done */		/*		 * OK, kswapd is getting into trouble.  Take a nap, then take		 * another pass across the zones.		 */		if (total_scanned && priority < DEF_PRIORITY - 2)			congestion_wait(WRITE, HZ/10);		/*		 * We do this so kswapd doesn't build up large priorities for		 * example when it is freeing in parallel with allocators. It		 * matches the direct reclaim path behaviour in terms of impact		 * on zone->*_priority.		 */		if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)			break;	}out:	/*	 * Note within each zone the priority level at which this zone was	 * brought into a happy state.  So that the next thread which scans this	 * zone will start out at that priority level.	 */	for (i = 0; i < pgdat->nr_zones; i++) {		struct zone *zone = pgdat->node_zones + i;		zone->prev_priority = temp_priority[i];	}	if (!all_zones_ok) {		cond_resched();		try_to_freeze();		/*		 * Fragmentation may mean that the system cannot be		 * rebalanced for high-order allocations in all zones.		 * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX,		 * it means the zones have been fully scanned and are still		 * not balanced. For high-order allocations, there is		 * little point trying all over again as kswapd may		 * infinite loop.		 *		 * Instead, recheck all watermarks at order-0 as they		 * are the most important. If watermarks are ok, kswapd will go		 * back to sleep. High-order users can still perform direct		 * reclaim if they wish.		 */		if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)			order = sc.order = 0;		goto loop_again;	}	return sc.nr_reclaimed;}/* * The background pageout daemon, started as a kernel thread * from the init process. * * This basically trickles out pages so that we have _some_ * free memory available even if there is no other activity * that frees anything up. This is needed for things like routing * etc, where we otherwise might have all activity going on in * asynchronous contexts that cannot page things out. * * If there are applications that are active memory-allocators * (most normal use), this basically shouldn't matter. */static int kswapd(void *p){	unsigned long order;	pg_data_t *pgdat = (pg_data_t*)p;	struct task_struct *tsk = current;	DEFINE_WAIT(wait);	struct reclaim_state reclaim_state = {		.reclaimed_slab = 0,	};	node_to_cpumask_ptr(cpumask, pgdat->node_id);	if (!cpumask_empty(cpumask))		set_cpus_allowed_ptr(tsk, cpumask);	current->reclaim_state = &reclaim_state;	/*	 * Tell the memory management that we're a "memory allocator",	 * and that if we need more memory we should get access to it	 * regardless (see "__alloc_pages()"). "kswapd" should	 * never get caught in the normal page freeing logic.	 *	 * (Kswapd normally doesn't need memory anyway, but sometimes	 * you need a small amount of memory in order to be able to	 * page out something else, and this flag essentially protects	 * us from recursively trying to free more memory as we're	 * trying to free the first piece of memory in the first place).	 */	tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;	set_freezable();	order = 0;	for ( ; ; ) {		unsigned long new_order;		prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);		new_order = pgdat->kswapd_max_order;		pgdat->kswapd_max_order = 0;		if (order < new_order) {			/*			 * Don't sleep if someone wants a larger 'order'			 * allocation			 */			order = new_order;		} else {			if (!freezing(current))				schedule();			order = pgdat->kswapd_max_order;		}		finish_wait(&pgdat->kswapd_wait, &wait);		if (!try_to_freeze()) {			/* We can speed up thawing tasks if we don't call			 * balance_pgdat after returning from the refrigerator			 */			balance_pgdat(pgdat, order);		}	}	return 0;}/* * A zone is low on free memory, so wake its kswapd task to service it. */void wakeup_kswapd(struct zone *zone, int order){	pg_data_t *pgdat;	if (!populated_zone(zone))		return;	pgdat = zone->zone_pgdat;	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))		return;	if (pgdat->kswapd_max_order < order)		pgdat->kswapd_max_order = order;	if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))		return;	if (!waitqueue_active(&pgdat->kswapd_wait))		return;	wake_up_interruptible(&pgdat->kswapd_wait);}unsigned long global_lru_pages(void){	return global_page_state(NR_ACTIVE_ANON)		+ global_page_state(NR_ACTIVE_FILE)		+ global_page_state(NR_INACTIVE_ANON)		+ global_page_state(NR_INACTIVE_FILE);}#ifdef CONFIG_PM/* * Helper function for shrink_all_memory().  Tries to reclaim 'nr_pages' pages * from LRU lists system-wide, for given pass and priority, and returns the * number of reclaimed pages * * For pass > 3 we also try to shrink the LRU lists that contain a few pages */static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,				      int pass, struct scan_control *sc){	struct zone *zone;	unsigned long ret = 0;	for_each_zone(zone) {		enum lru_list l;		if (!populated_zone(zone))			continue;		if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)			continue;		for_each_evictable_lru(l) {			enum zone_stat_item ls = NR_LRU_BASE + l;			unsigned long lru_pages = zone_page_state(zone, ls);			/* For pass = 0, we don't shrink the active list */			if (pass == 0 && (l == LRU_ACTIVE_ANON ||						l == LRU_ACTIVE_FILE))				continue;			zone->lru[l].nr_scan += (lru_pages >> prio) + 1;			if (zone->lru[l].nr_scan >= nr_pages || pass > 3) {				unsigned long nr_to_scan;				zone->lru[l].nr_scan = 0;				nr_to_scan = min(nr_pages, lru_pages);				ret += shrink_list(l, nr_to_scan, zone,								sc, prio);				if (ret >= nr_pages)					return ret;			}		}	}	return ret;}/* * Try to free `nr_pages' of memory, system-wide, and return the number of * freed pages. * * Rather than trying to age LRUs the aim is to preserve the overall * LRU order by reclaiming preferentially * inactive > active > active referenced > active mapped */unsigned long shrink_all_memory(unsigned long nr_pages){	unsigned long lru_pages, nr_slab;	unsigned long ret = 0;	int pass;	struct reclaim_state reclaim_state;	struct scan_control sc = {		.gfp_mask = GFP_KERNEL,		.may_swap = 0,		.swap_cluster_max = nr_pages,		.may_writepage = 1,		.isolate_pages = isolate_pages_global,	};	current->reclaim_state = &reclaim_state;	lru_pages = global_lru_pages();	nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);	/* If slab caches are huge, it's better to hit them first */	while (nr_slab >= lru_pages) {		reclaim_state.reclaimed_slab = 0;		shrink_slab(nr_pages, sc.gfp_mask, lru_pages);		if (!reclaim_state.reclaimed_slab)			break;		ret += reclaim_state.reclaimed_slab;		if (ret >= nr_pages)			goto out;		nr_slab -= reclaim_state.reclaimed_slab;	}	/*	 * We try to shrink LRUs in 5 passes:	 * 0 = Reclaim from inactive_list only	 * 1 = Reclaim from active list but don't reclaim mapped	 * 2 = 2nd pass of type 1	 * 3 = Reclaim mapped (normal reclaim)	 * 4 = 2nd pass of type 3	 */	for (pass = 0; pass < 5; pass++) {		int prio;		/* Force reclaiming mapped pages in the passes #3 and #4 */		if (pass > 2)			sc.may_swap = 1;		for (prio = DEF_PRIORITY; prio >= 0; prio--) {			unsigned long nr_to_scan = nr_pages - ret;			sc.nr_scanned = 0;			ret += shrink_all_zones(nr_to_scan, prio, pass, &sc);			if (ret >= nr_pages)				goto out;			reclaim_state.reclaimed_slab = 0;			shrink_slab(sc.nr_scanned, sc.gfp_mask,					global_lru_pages());			ret += reclaim_state.reclaimed_slab;			if (ret >= nr_pages)				goto out;			if (sc.nr_scanned && prio < DEF_PRIORITY - 2)				congestion_wait(WRITE, HZ / 10);		}	}	/*	 * If ret = 0, we could not shrink LRUs, but there may be something	 * in slab caches	 */	if (!ret) {		do {			reclaim_state.reclaimed_slab = 0;			shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());			ret += reclaim_state.reclaimed_slab;		} while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);	}out:	current->reclaim_state = NULL;	return ret;}#endif/* It's optimal to keep kswapds on the same CPUs as their memory, but   not required for correctness.  So if the last cpu in a node goes   away, we get changed to run anywhere: as the first one comes back,   restore their cpu bindings. */static int __devinit cpu_callback(struct notifier_block *nfb,				  unsigned long action, void *hcpu){	int nid;	if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {		for_each_node_state(nid, N_HIGH_MEMORY) {			pg_data_t *pgdat = NODE_DATA(nid);			node_to_cpumask_ptr(mask, pgdat->node_id);			if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)				/* One of our CPUs online: restore mask */				set_cpus_allowed_ptr(pgdat->kswapd, mask);		}	}	return NOTIFY_OK;}/* * This kswapd start function will be called by init and node-hot-add. * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added. */int kswapd_run(int nid){	pg_data_t *pgdat = NODE_DATA(nid);	int ret = 0;	if (pgdat->kswapd)		return 0;	pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);	if (IS_ERR(pgdat->kswapd)) {		/* failure at boot is fatal */		BUG_ON(system_state == SYSTEM_BOOTING);		printk("Failed to start kswapd on node %d\n",nid);		ret = -1;	}	return ret;}static int __init kswapd_init(void){	int nid;	swap_setup();	for_each_node_state(nid, N_HIGH_MEMORY) 		kswapd_run(nid);	hotcpu_notifier(cpu_callback, 0);	return 0;}module_init(kswapd_init)#ifdef CONFIG_NUMA/* * Zone reclaim mode * * If non-zero call zone_reclaim when the number of free pages falls below * the watermarks. */int zone_reclaim_mode __read_mostly;#define RECLAIM_OFF 0#define RECLAIM_ZONE (1<<0)	/* Run shrink_inactive_list on the zone */#define RECLAIM_WRITE (1<<1)	/* Writeout pages during reclaim */#define RECLAIM_SWAP (1<<2)	/* Swap pages out during reclaim *//* * Priority for ZONE_RECLAIM. This determines the fraction of pages * of a node considered for each zone_reclaim. 4 scans 1/16th of * a zone. */#define ZONE_RECLAIM_PRIORITY 4/* * Percentage of pages in a zone that must be unmapped for zone_reclaim to * occur. */int sysctl_min_unmapped_ratio = 1;/* * If the number of slab pages in a zone grows beyond this percentage then * slab reclaim needs to occur. */int sysctl_min_slab_ratio = 5;/* * Try to free up some pages from this zone through reclaim. */static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order){	/* Minimum pages needed in order to stay on node */	const unsigned long nr_pages = 1 << order;	struct task_struct *p = current;	struct reclaim_state reclaim_state;	int priority;	struct scan_control sc = {		.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),		.may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP),		.swap_cluster_max = max_t(unsigned long, nr_pages,					SWAP_CLUSTER_MAX),		.gfp_mask = gfp_mask,		.swappiness = vm_swappiness,		.isolate_pages = isolate_pages_global,	};	unsigned long slab_reclaimable;	disable_swap_token();	cond_resched();	/*	 * We need to be able to allocate from the reserves for RECLAIM_SWAP	 * and we also need to be able to write out pages for RECLAIM_WRITE	 * and RECLAIM_SWAP.	 */	p->flags |= PF_MEMALLOC | PF_SWAPWRITE;	reclaim_state.reclaimed_slab = 0;	p->reclaim_state = &reclaim_state;	if (zone_page_state(zone, NR_FILE_PAGES) -		zone_page_state(zone, NR_FILE_MAPPED) >		zone->min_unmapped_pages) {		/*		 * Free memory by calling shrink zone with increasing		 * priorities until we have enough memory freed.		 */		priority = ZONE_RECLAIM_PRIORITY;		do {			note_zone_scanning_priority(zone, priority);			shrink_zone(priority, zone, &sc);			priority--;		} while (priority >= 0 && sc.nr_reclaimed < nr_pages);	}	slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE);	if (slab_reclaimable > zone->min_slab_pages) {		/*		 * shrink_slab() does not currently allow us to determine how		 * many pages were freed in this zone. So we take the current		 * number of slab pages and shake the slab until it is reduced		 * by the same nr_pages that we used for reclaiming unmapped		 * pages.		 *		 * Note that shrink_slab will free memory on all zones and may		 * take a
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -