📄 io_apic.c
字号:
/* * Intel IO-APIC support for multi-Pentium hosts. * * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo * * Many thanks to Stig Venaas for trying out countless experimental * patches and reporting/debugging problems patiently! * * (c) 1999, Multiple IO-APIC support, developed by * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>, * further tested and cleaned up by Zach Brown <zab@redhat.com> * and Ingo Molnar <mingo@redhat.com> * * Fixes * Maciej W. Rozycki : Bits for genuine 82489DX APICs; * thanks to Eric Gilmore * and Rolf G. Tews * for testing these extensively * Paul Diefenbaugh : Added full ACPI support */#include <linux/mm.h>#include <linux/irq.h>#include <linux/interrupt.h>#include <linux/init.h>#include <linux/delay.h>#include <linux/sched.h>#include <linux/config.h>#include <linux/smp_lock.h>#include <linux/mc146818rtc.h>#include <linux/compiler.h>#include <linux/acpi.h>#include <asm/io.h>#include <asm/smp.h>#include <asm/desc.h>#include <asm/timer.h>#include <mach_apic.h>#include "io_ports.h"static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;/* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes */int sis_apic_bug = -1;/* * # of IRQ routing registers */int nr_ioapic_registers[MAX_IO_APICS];/* * Rough estimation of how many shared IRQs there are, can * be changed anytime. */#define MAX_PLUS_SHARED_IRQS NR_IRQS#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)/* * This is performance-critical, we want to do it O(1) * * the indexing order of this array favors 1:1 mappings * between pins and IRQs. */static struct irq_pin_list { int apic, pin, next;} irq_2_pin[PIN_MAP_SIZE];int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};#ifdef CONFIG_PCI_MSI#define vector_to_irq(vector) \ (platform_legacy_irq(vector) ? vector : vector_irq[vector])#else#define vector_to_irq(vector) (vector)#endif/* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */static void __init add_pin_to_irq(unsigned int irq, int apic, int pin){ static int first_free_entry = NR_IRQS; struct irq_pin_list *entry = irq_2_pin + irq; while (entry->next) entry = irq_2_pin + entry->next; if (entry->pin != -1) { entry->next = first_free_entry; entry = irq_2_pin + entry->next; if (++first_free_entry >= PIN_MAP_SIZE) panic("io_apic.c: whoops"); } entry->apic = apic; entry->pin = pin;}/* * Reroute an IRQ to a different pin. */static void __init replace_pin_at_irq(unsigned int irq, int oldapic, int oldpin, int newapic, int newpin){ struct irq_pin_list *entry = irq_2_pin + irq; while (1) { if (entry->apic == oldapic && entry->pin == oldpin) { entry->apic = newapic; entry->pin = newpin; } if (!entry->next) break; entry = irq_2_pin + entry->next; }}static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable){ struct irq_pin_list *entry = irq_2_pin + irq; unsigned int pin, reg; for (;;) { pin = entry->pin; if (pin == -1) break; reg = io_apic_read(entry->apic, 0x10 + pin*2); reg &= ~disable; reg |= enable; io_apic_modify(entry->apic, 0x10 + pin*2, reg); if (!entry->next) break; entry = irq_2_pin + entry->next; }}/* mask = 1 */static void __mask_IO_APIC_irq (unsigned int irq){ __modify_IO_APIC_irq(irq, 0x00010000, 0);}/* mask = 0 */static void __unmask_IO_APIC_irq (unsigned int irq){ __modify_IO_APIC_irq(irq, 0, 0x00010000);}/* mask = 1, trigger = 0 */static void __mask_and_edge_IO_APIC_irq (unsigned int irq){ __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);}/* mask = 0, trigger = 1 */static void __unmask_and_level_IO_APIC_irq (unsigned int irq){ __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);}static void mask_IO_APIC_irq (unsigned int irq){ unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); __mask_IO_APIC_irq(irq); spin_unlock_irqrestore(&ioapic_lock, flags);}static void unmask_IO_APIC_irq (unsigned int irq){ unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); __unmask_IO_APIC_irq(irq); spin_unlock_irqrestore(&ioapic_lock, flags);}void clear_IO_APIC_pin(unsigned int apic, unsigned int pin){ struct IO_APIC_route_entry entry; unsigned long flags; /* Check delivery_mode to be sure we're not clearing an SMI pin */ spin_lock_irqsave(&ioapic_lock, flags); *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); spin_unlock_irqrestore(&ioapic_lock, flags); if (entry.delivery_mode == dest_SMI) return; /* * Disable it in the IO-APIC irq-routing table: */ memset(&entry, 0, sizeof(entry)); entry.mask = 1; spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); spin_unlock_irqrestore(&ioapic_lock, flags);}static void clear_IO_APIC (void){ int apic, pin; for (apic = 0; apic < nr_ioapics; apic++) for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) clear_IO_APIC_pin(apic, pin);}static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask){ unsigned long flags; int pin; struct irq_pin_list *entry = irq_2_pin + irq; unsigned int apicid_value; apicid_value = cpu_mask_to_apicid(cpumask); /* Prepare to do the io_apic_write */ apicid_value = apicid_value << 24; spin_lock_irqsave(&ioapic_lock, flags); for (;;) { pin = entry->pin; if (pin == -1) break; io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); if (!entry->next) break; entry = irq_2_pin + entry->next; } spin_unlock_irqrestore(&ioapic_lock, flags);}#if defined(CONFIG_IRQBALANCE)# include <asm/processor.h> /* kernel_thread() */# include <linux/kernel_stat.h> /* kstat */# include <linux/slab.h> /* kmalloc() */# include <linux/timer.h> /* time_after() */ # ifdef CONFIG_BALANCED_IRQ_DEBUG# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)# define Dprintk(x...) do { TDprintk(x); } while (0)# else# define TDprintk(x...) # define Dprintk(x...) # endifextern cpumask_t irq_affinity[NR_IRQS];cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];#define IRQBALANCE_CHECK_ARCH -999static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;static int physical_balance = 0;struct irq_cpu_info { unsigned long * last_irq; unsigned long * irq_delta; unsigned long irq;} irq_cpu_data[NR_CPUS];#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq])#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq])#define IDLE_ENOUGH(cpu,now) \ (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)#define BALANCED_IRQ_MORE_DELTA (HZ/10)#define BALANCED_IRQ_LESS_DELTA (HZ)long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;static unsigned long move(int curr_cpu, cpumask_t allowed_mask, unsigned long now, int direction){ int search_idle = 1; int cpu = curr_cpu; goto inside; do { if (unlikely(cpu == curr_cpu)) search_idle = 0;inside: if (direction == 1) { cpu++; if (cpu >= NR_CPUS) cpu = 0; } else { cpu--; if (cpu == -1) cpu = NR_CPUS-1; } } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || (search_idle && !IDLE_ENOUGH(cpu,now))); return cpu;}static inline void balance_irq(int cpu, int irq){ unsigned long now = jiffies; cpumask_t allowed_mask; unsigned int new_cpu; if (irqbalance_disabled) return; cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); new_cpu = move(cpu, allowed_mask, now, 1); if (cpu != new_cpu) { irq_desc_t *desc = irq_desc + irq; unsigned long flags; spin_lock_irqsave(&desc->lock, flags); pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu); spin_unlock_irqrestore(&desc->lock, flags); }}static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold){ int i, j; Dprintk("Rotating IRQs among CPUs.\n"); for (i = 0; i < NR_CPUS; i++) { for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) { if (!irq_desc[j].action) continue; /* Is it a significant load ? */ if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < useful_load_threshold) continue; balance_irq(i, j); } } balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); return;}static void do_irq_balance(void){ int i, j; unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); unsigned long move_this_load = 0; int max_loaded = 0, min_loaded = 0; int load; unsigned long useful_load_threshold = balanced_irq_interval + 10; int selected_irq; int tmp_loaded, first_attempt = 1; unsigned long tmp_cpu_irq; unsigned long imbalance = 0; cpumask_t allowed_mask, target_cpu_mask, tmp; for (i = 0; i < NR_CPUS; i++) { int package_index; CPU_IRQ(i) = 0; if (!cpu_online(i)) continue; package_index = CPU_TO_PACKAGEINDEX(i); for (j = 0; j < NR_IRQS; j++) { unsigned long value_now, delta; /* Is this an active IRQ? */ if (!irq_desc[j].action) continue; if ( package_index == i ) IRQ_DELTA(package_index,j) = 0; /* Determine the total count per processor per IRQ */ value_now = (unsigned long) kstat_cpu(i).irqs[j]; /* Determine the activity per processor per IRQ */ delta = value_now - LAST_CPU_IRQ(i,j); /* Update last_cpu_irq[][] for the next time */ LAST_CPU_IRQ(i,j) = value_now; /* Ignore IRQs whose rate is less than the clock */ if (delta < useful_load_threshold) continue; /* update the load for the processor or package total */ IRQ_DELTA(package_index,j) += delta; /* Keep track of the higher numbered sibling as well */ if (i != package_index) CPU_IRQ(i) += delta; /* * We have sibling A and sibling B in the package * * cpu_irq[A] = load for cpu A + load for cpu B * cpu_irq[B] = load for cpu B */ CPU_IRQ(package_index) += delta; } } /* Find the least loaded processor package */ for (i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) continue; if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (min_cpu_irq > CPU_IRQ(i)) { min_cpu_irq = CPU_IRQ(i); min_loaded = i; } } max_cpu_irq = ULONG_MAX;tryanothercpu: /* Look for heaviest loaded processor. * We may come back to get the next heaviest loaded processor. * Skip processors with trivial loads. */ tmp_cpu_irq = 0; tmp_loaded = -1; for (i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) continue; if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (max_cpu_irq <= CPU_IRQ(i)) continue; if (tmp_cpu_irq < CPU_IRQ(i)) { tmp_cpu_irq = CPU_IRQ(i); tmp_loaded = i; } } if (tmp_loaded == -1) { /* In the case of small number of heavy interrupt sources, * loading some of the cpus too much. We use Ingo's original * approach to rotate them around. */ if (!first_attempt && imbalance >= useful_load_threshold) { rotate_irqs_among_cpus(useful_load_threshold); return; } goto not_worth_the_effort; } first_attempt = 0; /* heaviest search */ max_cpu_irq = tmp_cpu_irq; /* load */ max_loaded = tmp_loaded; /* processor */ imbalance = (max_cpu_irq - min_cpu_irq) / 2; Dprintk("max_loaded cpu = %d\n", max_loaded); Dprintk("min_loaded cpu = %d\n", min_loaded); Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq); Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq); Dprintk("load imbalance = %lu\n", imbalance); /* if imbalance is less than approx 10% of max load, then * observe diminishing returns action. - quit */ if (imbalance < (max_cpu_irq >> 3)) { Dprintk("Imbalance too trivial\n"); goto not_worth_the_effort; }tryanotherirq: /* if we select an IRQ to move that can't go where we want, then * see if there is another one to try. */ move_this_load = 0; selected_irq = -1; for (j = 0; j < NR_IRQS; j++) { /* Is this an active IRQ? */ if (!irq_desc[j].action) continue; if (imbalance <= IRQ_DELTA(max_loaded,j)) continue;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -