📄 page_alloc.c
字号:
" inactive_anon:%lukB" " active_file:%lukB" " inactive_file:%lukB"#ifdef CONFIG_UNEVICTABLE_LRU " unevictable:%lukB"#endif " present:%lukB" " pages_scanned:%lu" " all_unreclaimable? %s" "\n", zone->name, K(zone_page_state(zone, NR_FREE_PAGES)), K(zone->pages_min), K(zone->pages_low), K(zone->pages_high), K(zone_page_state(zone, NR_ACTIVE_ANON)), K(zone_page_state(zone, NR_INACTIVE_ANON)), K(zone_page_state(zone, NR_ACTIVE_FILE)), K(zone_page_state(zone, NR_INACTIVE_FILE)),#ifdef CONFIG_UNEVICTABLE_LRU K(zone_page_state(zone, NR_UNEVICTABLE)),#endif K(zone->present_pages), zone->pages_scanned, (zone_is_all_unreclaimable(zone) ? "yes" : "no") ); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) printk(" %lu", zone->lowmem_reserve[i]); printk("\n"); } for_each_zone(zone) { unsigned long nr[MAX_ORDER], flags, order, total = 0; if (!populated_zone(zone)) continue; show_node(zone); printk("%s: ", zone->name); spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { nr[order] = zone->free_area[order].nr_free; total += nr[order] << order; } spin_unlock_irqrestore(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) printk("%lu*%lukB ", nr[order], K(1UL) << order); printk("= %lukB\n", K(total)); } printk("%ld total pagecache pages\n", global_page_state(NR_FILE_PAGES)); show_swap_cache_info();}static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref){ zoneref->zone = zone; zoneref->zone_idx = zone_idx(zone);}/* * Builds allocation fallback zone lists. * * Add all populated zones of a node to the zonelist. */static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int nr_zones, enum zone_type zone_type){ struct zone *zone; BUG_ON(zone_type >= MAX_NR_ZONES); zone_type++; do { zone_type--; zone = pgdat->node_zones + zone_type; if (populated_zone(zone)) { zoneref_set_zone(zone, &zonelist->_zonerefs[nr_zones++]); check_highest_zone(zone_type); } } while (zone_type); return nr_zones;}/* * zonelist_order: * 0 = automatic detection of better ordering. * 1 = order by ([node] distance, -zonetype) * 2 = order by (-zonetype, [node] distance) * * If not NUMA, ZONELIST_ORDER_ZONE and ZONELIST_ORDER_NODE will create * the same zonelist. So only NUMA can configure this param. */#define ZONELIST_ORDER_DEFAULT 0#define ZONELIST_ORDER_NODE 1#define ZONELIST_ORDER_ZONE 2/* zonelist order in the kernel. * set_zonelist_order() will set this to NODE or ZONE. */static int current_zonelist_order = ZONELIST_ORDER_DEFAULT;static char zonelist_order_name[3][8] = {"Default", "Node", "Zone"};#ifdef CONFIG_NUMA/* The value user specified ....changed by config */static int user_zonelist_order = ZONELIST_ORDER_DEFAULT;/* string for sysctl */#define NUMA_ZONELIST_ORDER_LEN 16char numa_zonelist_order[16] = "default";/* * interface for configure zonelist ordering. * command line option "numa_zonelist_order" * = "[dD]efault - default, automatic configuration. * = "[nN]ode - order by node locality, then by zone within node * = "[zZ]one - order by zone, then by locality within zone */static int __parse_numa_zonelist_order(char *s){ if (*s == 'd' || *s == 'D') { user_zonelist_order = ZONELIST_ORDER_DEFAULT; } else if (*s == 'n' || *s == 'N') { user_zonelist_order = ZONELIST_ORDER_NODE; } else if (*s == 'z' || *s == 'Z') { user_zonelist_order = ZONELIST_ORDER_ZONE; } else { printk(KERN_WARNING "Ignoring invalid numa_zonelist_order value: " "%s\n", s); return -EINVAL; } return 0;}static __init int setup_numa_zonelist_order(char *s){ if (s) return __parse_numa_zonelist_order(s); return 0;}early_param("numa_zonelist_order", setup_numa_zonelist_order);/* * sysctl handler for numa_zonelist_order */int numa_zonelist_order_handler(ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos){ char saved_string[NUMA_ZONELIST_ORDER_LEN]; int ret; if (write) strncpy(saved_string, (char*)table->data, NUMA_ZONELIST_ORDER_LEN); ret = proc_dostring(table, write, file, buffer, length, ppos); if (ret) return ret; if (write) { int oldval = user_zonelist_order; if (__parse_numa_zonelist_order((char*)table->data)) { /* * bogus value. restore saved string */ strncpy((char*)table->data, saved_string, NUMA_ZONELIST_ORDER_LEN); user_zonelist_order = oldval; } else if (oldval != user_zonelist_order) build_all_zonelists(); } return 0;}#define MAX_NODE_LOAD (num_online_nodes())static int node_load[MAX_NUMNODES];/** * find_next_best_node - find the next node that should appear in a given node's fallback list * @node: node whose fallback list we're appending * @used_node_mask: nodemask_t of already used nodes * * We use a number of factors to determine which is the next node that should * appear on a given node's fallback list. The node should not have appeared * already in @node's fallback list, and it should be the next closest node * according to the distance array (which contains arbitrary distance values * from each node to each node in the system), and should also prefer nodes * with no CPUs, since presumably they'll have very little allocation pressure * on them otherwise. * It returns -1 if no node is found. */static int find_next_best_node(int node, nodemask_t *used_node_mask){ int n, val; int min_val = INT_MAX; int best_node = -1; node_to_cpumask_ptr(tmp, 0); /* Use the local node if we haven't already */ if (!node_isset(node, *used_node_mask)) { node_set(node, *used_node_mask); return node; } for_each_node_state(n, N_HIGH_MEMORY) { /* Don't want a node to appear more than once */ if (node_isset(n, *used_node_mask)) continue; /* Use the distance array to find the distance */ val = node_distance(node, n); /* Penalize nodes under us ("prefer the next node") */ val += (n < node); /* Give preference to headless and unused nodes */ node_to_cpumask_ptr_next(tmp, n); if (!cpus_empty(*tmp)) val += PENALTY_FOR_NODE_WITH_CPUS; /* Slight preference for less loaded node */ val *= (MAX_NODE_LOAD*MAX_NUMNODES); val += node_load[n]; if (val < min_val) { min_val = val; best_node = n; } } if (best_node >= 0) node_set(best_node, *used_node_mask); return best_node;}/* * Build zonelists ordered by node and zones within node. * This results in maximum locality--normal zone overflows into local * DMA zone, if any--but risks exhausting DMA zone. */static void build_zonelists_in_node_order(pg_data_t *pgdat, int node){ int j; struct zonelist *zonelist; zonelist = &pgdat->node_zonelists[0]; for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++) ; j = build_zonelists_node(NODE_DATA(node), zonelist, j, MAX_NR_ZONES - 1); zonelist->_zonerefs[j].zone = NULL; zonelist->_zonerefs[j].zone_idx = 0;}/* * Build gfp_thisnode zonelists */static void build_thisnode_zonelists(pg_data_t *pgdat){ int j; struct zonelist *zonelist; zonelist = &pgdat->node_zonelists[1]; j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1); zonelist->_zonerefs[j].zone = NULL; zonelist->_zonerefs[j].zone_idx = 0;}/* * Build zonelists ordered by zone and nodes within zones. * This results in conserving DMA zone[s] until all Normal memory is * exhausted, but results in overflowing to remote node while memory * may still exist in local DMA zone. */static int node_order[MAX_NUMNODES];static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes){ int pos, j, node; int zone_type; /* needs to be signed */ struct zone *z; struct zonelist *zonelist; zonelist = &pgdat->node_zonelists[0]; pos = 0; for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) { for (j = 0; j < nr_nodes; j++) { node = node_order[j]; z = &NODE_DATA(node)->node_zones[zone_type]; if (populated_zone(z)) { zoneref_set_zone(z, &zonelist->_zonerefs[pos++]); check_highest_zone(zone_type); } } } zonelist->_zonerefs[pos].zone = NULL; zonelist->_zonerefs[pos].zone_idx = 0;}static int default_zonelist_order(void){ int nid, zone_type; unsigned long low_kmem_size,total_size; struct zone *z; int average_size; /* * ZONE_DMA and ZONE_DMA32 can be very small area in the sytem. * If they are really small and used heavily, the system can fall * into OOM very easily. * This function detect ZONE_DMA/DMA32 size and confgigures zone order. */ /* Is there ZONE_NORMAL ? (ex. ppc has only DMA zone..) */ low_kmem_size = 0; total_size = 0; for_each_online_node(nid) { for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) { z = &NODE_DATA(nid)->node_zones[zone_type]; if (populated_zone(z)) { if (zone_type < ZONE_NORMAL) low_kmem_size += z->present_pages; total_size += z->present_pages; } } } if (!low_kmem_size || /* there are no DMA area. */ low_kmem_size > total_size/2) /* DMA/DMA32 is big. */ return ZONELIST_ORDER_NODE; /* * look into each node's config. * If there is a node whose DMA/DMA32 memory is very big area on * local memory, NODE_ORDER may be suitable. */ average_size = total_size / (nodes_weight(node_states[N_HIGH_MEMORY]) + 1); for_each_online_node(nid) { low_kmem_size = 0; total_size = 0; for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) { z = &NODE_DATA(nid)->node_zones[zone_type]; if (populated_zone(z)) { if (zone_type < ZONE_NORMAL) low_kmem_size += z->present_pages; total_size += z->present_pages; } } if (low_kmem_size && total_size > average_size && /* ignore small node */ low_kmem_size > total_size * 70/100) return ZONELIST_ORDER_NODE; } return ZONELIST_ORDER_ZONE;}static void set_zonelist_order(void){ if (user_zonelist_order == ZONELIST_ORDER_DEFAULT) current_zonelist_order = default_zonelist_order(); else current_zonelist_order = user_zonelist_order;}static void build_zonelists(pg_data_t *pgdat){ int j, node, load; enum zone_type i; nodemask_t used_mask; int local_node, prev_node; struct zonelist *zonelist; int order = current_zonelist_order; /* initialize zonelists */ for (i = 0; i < MAX_ZONELISTS; i++) { zonelist = pgdat->node_zonelists + i; zonelist->_zonerefs[0].zone = NULL; zonelist->_zonerefs[0].zone_idx = 0; } /* NUMA-aware ordering of nodes */ local_node = pgdat->node_id; load = num_online_nodes(); prev_node = local_node; nodes_clear(used_mask); memset(node_load, 0, sizeof(node_load)); memset(node_order, 0, sizeof(node_order)); j = 0; while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { int distance = node_distance(local_node, node); /* * If another node is sufficiently far away then it is better * to reclaim pages in a zone before going off node. */ if (distance > RECLAIM_DISTANCE) zone_reclaim_mode = 1; /* * We don't want to pressure a particular node. * So adding penalty to the first node in same * distance group to make it round-robin. */ if (distance != node_distance(local_node, prev_node)) node_load[node] = load; prev_node = node; load--; if (order == ZONELIST_ORDER_NODE) build_zonelists_in_node_order(pgdat, node); else node_order[j++] = node; /* remember order */ } if (order == ZONELIST_ORDER_ZONE) { /* calculate node order -- i.e., DMA last! */ build_zonelists_in_zone_order(pgdat, j); } build_thisnode_zonelists(pgdat);}/* Construct the zonelist performance cache - see further mmzone.h */static void build_zonelist_cache(pg_data_t *pgdat){ struct zonelist *zonelist; struct zonelist_cache *zlc; struct zoneref *z; zonelist = &pgdat->node_zonelists[0]; zonelist->zlcache_ptr = zlc = &zonelist->zlcache; bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); for (z = zonelist->_zonerefs; z->zone; z++) zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);}#else /* CONFIG_NUMA */static void set_zonelist_order(void){ current_zonelist_order = ZONELIST_ORDER_ZONE;}static void build_zonelists(pg_data_t *pgdat){ int node, local_node; enum zone_type j; struct zonelist *zonelist; local_node = pgdat->node_id; zonelist = &pgdat->node_zonelists[0]; j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1); /* * Now we build the zonelist so that it contains the zones * of all the other nodes. * We don't want to pressure a particular node, so when * building the zones for node N, we make sure that the * zones coming right after the local ones are those from * node N+1 (modulo N) */ for (node = local_node + 1; node < MAX_NUMNODES; node++) { if (!node_online(node)) continue; j = build_zonelists_node(NODE_DATA(node), zonelist, j, MAX_NR_ZONES - 1); } for (node = 0; node < local_node; node++) { if (!node_on
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -