📄 numa.c

📁 linux-2.6.15.6
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * pSeries NUMA support * * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */#include <linux/threads.h>#include <linux/bootmem.h>#include <linux/init.h>#include <linux/mm.h>#include <linux/mmzone.h>#include <linux/module.h>#include <linux/nodemask.h>#include <linux/cpu.h>#include <linux/notifier.h>#include <asm/sparsemem.h>#include <asm/lmb.h>#include <asm/system.h>#include <asm/smp.h>static int numa_enabled = 1;static int numa_debug;#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }int numa_cpu_lookup_table[NR_CPUS];cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];struct pglist_data *node_data[MAX_NUMNODES];EXPORT_SYMBOL(numa_cpu_lookup_table);EXPORT_SYMBOL(numa_cpumask_lookup_table);EXPORT_SYMBOL(node_data);static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];static int min_common_depth;/* * We need somewhere to store start/end/node for each region until we have * allocated the real node_data structures. */#define MAX_REGIONS	(MAX_LMB_REGIONS*2)static struct {	unsigned long start_pfn;	unsigned long end_pfn;	int nid;} init_node_data[MAX_REGIONS] __initdata;int __init early_pfn_to_nid(unsigned long pfn){	unsigned int i;	for (i = 0; init_node_data[i].end_pfn; i++) {		unsigned long start_pfn = init_node_data[i].start_pfn;		unsigned long end_pfn = init_node_data[i].end_pfn;		if ((start_pfn <= pfn) && (pfn < end_pfn))			return init_node_data[i].nid;	}	return -1;}void __init add_region(unsigned int nid, unsigned long start_pfn,		       unsigned long pages){	unsigned int i;	dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n",		nid, start_pfn, pages);	for (i = 0; init_node_data[i].end_pfn; i++) {		if (init_node_data[i].nid != nid)			continue;		if (init_node_data[i].end_pfn == start_pfn) {			init_node_data[i].end_pfn += pages;			return;		}		if (init_node_data[i].start_pfn == (start_pfn + pages)) {			init_node_data[i].start_pfn -= pages;			return;		}	}	/*	 * Leave last entry NULL so we dont iterate off the end (we use	 * entry.end_pfn to terminate the walk).	 */	if (i >= (MAX_REGIONS - 1)) {		printk(KERN_ERR "WARNING: too many memory regions in "				"numa code, truncating\n");		return;	}	init_node_data[i].start_pfn = start_pfn;	init_node_data[i].end_pfn = start_pfn + pages;	init_node_data[i].nid = nid;}/* We assume init_node_data has no overlapping regions */void __init get_region(unsigned int nid, unsigned long *start_pfn,		       unsigned long *end_pfn, unsigned long *pages_present){	unsigned int i;	*start_pfn = -1UL;	*end_pfn = *pages_present = 0;	for (i = 0; init_node_data[i].end_pfn; i++) {		if (init_node_data[i].nid != nid)			continue;		*pages_present += init_node_data[i].end_pfn -			init_node_data[i].start_pfn;		if (init_node_data[i].start_pfn < *start_pfn)			*start_pfn = init_node_data[i].start_pfn;		if (init_node_data[i].end_pfn > *end_pfn)			*end_pfn = init_node_data[i].end_pfn;	}	/* We didnt find a matching region, return start/end as 0 */	if (*start_pfn == -1UL)		*start_pfn = 0;}static inline void map_cpu_to_node(int cpu, int node){	numa_cpu_lookup_table[cpu] = node;	if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node])))		cpu_set(cpu, numa_cpumask_lookup_table[node]);}#ifdef CONFIG_HOTPLUG_CPUstatic void unmap_cpu_from_node(unsigned long cpu){	int node = numa_cpu_lookup_table[cpu];	dbg("removing cpu %lu from node %d\n", cpu, node);	if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) {		cpu_clear(cpu, numa_cpumask_lookup_table[node]);	} else {		printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",		       cpu, node);	}}#endif /* CONFIG_HOTPLUG_CPU */static struct device_node *find_cpu_node(unsigned int cpu){	unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);	struct device_node *cpu_node = NULL;	unsigned int *interrupt_server, *reg;	int len;	while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) {		/* Try interrupt server first */		interrupt_server = (unsigned int *)get_property(cpu_node,					"ibm,ppc-interrupt-server#s", &len);		len = len / sizeof(u32);		if (interrupt_server && (len > 0)) {			while (len--) {				if (interrupt_server[len] == hw_cpuid)					return cpu_node;			}		} else {			reg = (unsigned int *)get_property(cpu_node,							   "reg", &len);			if (reg && (len > 0) && (reg[0] == hw_cpuid))				return cpu_node;		}	}	return NULL;}/* must hold reference to node during call */static int *of_get_associativity(struct device_node *dev){	return (unsigned int *)get_property(dev, "ibm,associativity", NULL);}static int of_node_numa_domain(struct device_node *device){	int numa_domain;	unsigned int *tmp;	if (min_common_depth == -1)		return 0;	tmp = of_get_associativity(device);	if (tmp && (tmp[0] >= min_common_depth)) {		numa_domain = tmp[min_common_depth];	} else {		dbg("WARNING: no NUMA information for %s\n",		    device->full_name);		numa_domain = 0;	}	return numa_domain;}/* * In theory, the "ibm,associativity" property may contain multiple * associativity lists because a resource may be multiply connected * into the machine.  This resource then has different associativity * characteristics relative to its multiple connections.  We ignore * this for now.  We also assume that all cpu and memory sets have * their distances represented at a common level.  This won't be * true for heirarchical NUMA. * * In any case the ibm,associativity-reference-points should give * the correct depth for a normal NUMA system. * * - Dave Hansen <haveblue@us.ibm.com> */static int __init find_min_common_depth(void){	int depth;	unsigned int *ref_points;	struct device_node *rtas_root;	unsigned int len;	rtas_root = of_find_node_by_path("/rtas");	if (!rtas_root)		return -1;	/*	 * this property is 2 32-bit integers, each representing a level of	 * depth in the associativity nodes.  The first is for an SMP	 * configuration (should be all 0's) and the second is for a normal	 * NUMA configuration.	 */	ref_points = (unsigned int *)get_property(rtas_root,			"ibm,associativity-reference-points", &len);	if ((len >= 1) && ref_points) {		depth = ref_points[1];	} else {		dbg("WARNING: could not find NUMA "		    "associativity reference point\n");		depth = -1;	}	of_node_put(rtas_root);	return depth;}static int __init get_mem_addr_cells(void){	struct device_node *memory = NULL;	int rc;	memory = of_find_node_by_type(memory, "memory");	if (!memory)		return 0; /* it won't matter */	rc = prom_n_addr_cells(memory);	return rc;}static int __init get_mem_size_cells(void){	struct device_node *memory = NULL;	int rc;	memory = of_find_node_by_type(memory, "memory");	if (!memory)		return 0; /* it won't matter */	rc = prom_n_size_cells(memory);	return rc;}static unsigned long __init read_n_cells(int n, unsigned int **buf){	unsigned long result = 0;	while (n--) {		result = (result << 32) | **buf;		(*buf)++;	}	return result;}/* * Figure out to which domain a cpu belongs and stick it there. * Return the id of the domain used. */static int numa_setup_cpu(unsigned long lcpu){	int numa_domain = 0;	struct device_node *cpu = find_cpu_node(lcpu);	if (!cpu) {		WARN_ON(1);		goto out;	}	numa_domain = of_node_numa_domain(cpu);	if (numa_domain >= num_online_nodes()) {		/*		 * POWER4 LPAR uses 0xffff as invalid node,		 * dont warn in this case.		 */		if (numa_domain != 0xffff)			printk(KERN_ERR "WARNING: cpu %ld "			       "maps to invalid NUMA node %d\n",			       lcpu, numa_domain);		numa_domain = 0;	}out:	node_set_online(numa_domain);	map_cpu_to_node(lcpu, numa_domain);	of_node_put(cpu);	return numa_domain;}static int cpu_numa_callback(struct notifier_block *nfb,			     unsigned long action,			     void *hcpu){	unsigned long lcpu = (unsigned long)hcpu;	int ret = NOTIFY_DONE;	switch (action) {	case CPU_UP_PREPARE:		if (min_common_depth == -1 || !numa_enabled)			map_cpu_to_node(lcpu, 0);		else			numa_setup_cpu(lcpu);		ret = NOTIFY_OK;		break;#ifdef CONFIG_HOTPLUG_CPU	case CPU_DEAD:	case CPU_UP_CANCELED:		unmap_cpu_from_node(lcpu);		break;		ret = NOTIFY_OK;#endif	}	return ret;}/* * Check and possibly modify a memory region to enforce the memory limit. * * Returns the size the region should have to enforce the memory limit. * This will either be the original value of size, a truncated value, * or zero. If the returned value of size is 0 the region should be * discarded as it lies wholy above the memory limit. */static unsigned long __init numa_enforce_memory_limit(unsigned long start,						      unsigned long size){	/*
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -