📄 cpu_idle.c
字号:
/* * cpu_idle - xen idle state module derived from Linux * drivers/acpi/processor_idle.c & * arch/x86/kernel/acpi/cstate.c * * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> * Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de> * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> * - Added processor hotplug support * Copyright (C) 2005 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> * - Added support for C3 on SMP * Copyright (C) 2007, 2008 Intel Corporation * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or (at * your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */#include <xen/config.h>#include <xen/errno.h>#include <xen/lib.h>#include <xen/types.h>#include <xen/acpi.h>#include <xen/smp.h>#include <xen/guest_access.h>#include <xen/keyhandler.h>#include <asm/cache.h>#include <asm/io.h>#include <asm/hpet.h>#include <asm/processor.h>#include <public/platform.h>#include <public/sysctl.h>#define DEBUG_PM_CX#define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)#define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */#define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */#define ACPI_PROCESSOR_MAX_POWER 8#define ACPI_PROCESSOR_MAX_C2_LATENCY 100#define ACPI_PROCESSOR_MAX_C3_LATENCY 1000static void (*lapic_timer_off)(void);static void (*lapic_timer_on)(void);extern u32 pmtmr_ioport;extern void (*pm_idle) (void);static void (*pm_idle_save) (void) __read_mostly;unsigned int max_cstate __read_mostly = 2;integer_param("max_cstate", max_cstate);/* * bm_history -- bit-mask with a bit per jiffy of bus-master activity * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms * 100 HZ: 0x0000000F: 4 jiffies = 40ms * reduce history for more aggressive entry into C3 */unsigned int bm_history __read_mostly = (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));integer_param("bm_history", bm_history);struct acpi_processor_cx;struct acpi_processor_cx_policy{ u32 count; struct acpi_processor_cx *state; struct { u32 time; u32 ticks; u32 count; u32 bm; } threshold;};struct acpi_processor_cx{ u8 valid; u8 type; u32 address; u8 space_id; u32 latency; u32 latency_ticks; u32 power; u32 usage; u64 time; struct acpi_processor_cx_policy promotion; struct acpi_processor_cx_policy demotion;};struct acpi_processor_flags{ u8 bm_control:1; u8 bm_check:1; u8 has_cst:1; u8 power_setup_done:1; u8 bm_rld_set:1;};struct acpi_processor_power{ struct acpi_processor_flags flags; struct acpi_processor_cx *state; s_time_t bm_check_timestamp; u32 default_state; u32 bm_activity; u32 count; struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];};static struct acpi_processor_power processor_powers[NR_CPUS];static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power){ uint32_t i; printk("==cpu%d==\n", cpu); printk("active state:\t\tC%d\n", (power->state)?power->state->type:-1); printk("max_cstate:\t\tC%d\n", max_cstate); printk("bus master activity:\t%08x\n", power->bm_activity); printk("states:\n"); for ( i = 1; i < power->count; i++ ) { printk((power->states[i].type == power->state->type) ? " *" : " "); printk("C%d:\t\t", i); printk("type[C%d] ", power->states[i].type); if ( power->states[i].promotion.state ) printk("promotion[C%d] ", power->states[i].promotion.state->type); else printk("promotion[--] "); if ( power->states[i].demotion.state ) printk("demotion[C%d] ", power->states[i].demotion.state->type); else printk("demotion[--] "); printk("latency[%03d]\n ", power->states[i].latency); printk("\t\t\t"); printk("usage[%08d] ", power->states[i].usage); printk("duration[%"PRId64"]\n", power->states[i].time); }}static void dump_cx(unsigned char key){ for( int i = 0; i < num_online_cpus(); i++ ) print_acpi_power(i, &processor_powers[i]);}static int __init cpu_idle_key_init(void){ register_keyhandler( 'c', dump_cx, "dump cx structures"); return 0;}__initcall(cpu_idle_key_init);static inline u32 ticks_elapsed(u32 t1, u32 t2){ if ( t2 >= t1 ) return (t2 - t1); else if ( !(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER) ) return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); else return ((0xFFFFFFFF - t1) + t2);}static void acpi_processor_power_activate(struct acpi_processor_power *power, struct acpi_processor_cx *new){ struct acpi_processor_cx *old; if ( !power || !new ) return; old = power->state; if ( old ) old->promotion.count = 0; new->demotion.count = 0; /* Cleanup from old state. */ if ( old ) { switch ( old->type ) { case ACPI_STATE_C3: /* Disable bus master reload */ if ( new->type != ACPI_STATE_C3 && power->flags.bm_check ) acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); break; } } /* Prepare to use new state. */ switch ( new->type ) { case ACPI_STATE_C3: /* Enable bus master reload */ if ( old->type != ACPI_STATE_C3 && power->flags.bm_check ) acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); break; } power->state = new; return;}static void acpi_safe_halt(void){ smp_mb__after_clear_bit(); safe_halt();}#define MWAIT_ECX_INTERRUPT_BREAK (0x1)static void mwait_idle_with_hints(unsigned long eax, unsigned long ecx){ __monitor((void *)current, 0, 0); smp_mb(); __mwait(eax, ecx);}static void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx){ mwait_idle_with_hints(cx->address, MWAIT_ECX_INTERRUPT_BREAK);}static void acpi_idle_do_entry(struct acpi_processor_cx *cx){ if ( cx->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE ) { /* Call into architectural FFH based C-state */ acpi_processor_ffh_cstate_enter(cx); } else { int unused; /* IO port based C-state */ inb(cx->address); /* Dummy wait op - must do something useless after P_LVL2 read because chipsets cannot guarantee that STPCLK# signal gets asserted in time to freeze execution properly. */ unused = inl(pmtmr_ioport); }}static atomic_t c3_cpu_count;static void acpi_processor_idle(void){ struct acpi_processor_power *power = NULL; struct acpi_processor_cx *cx = NULL; struct acpi_processor_cx *next_state = NULL; int sleep_ticks = 0; u32 t1, t2 = 0; power = &processor_powers[smp_processor_id()]; /* * Interrupts must be disabled during bus mastering calculations and * for C2/C3 transitions. */ local_irq_disable(); if ( softirq_pending(smp_processor_id()) ) { local_irq_enable(); return; } cx = power->state; if ( !cx ) { if ( pm_idle_save ) { printk(XENLOG_DEBUG "call pm_idle_save()\n"); pm_idle_save(); } else { printk(XENLOG_DEBUG "call acpi_safe_halt()\n"); acpi_safe_halt(); } return; } /* * Check BM Activity * ----------------- * Check for bus mastering activity (if required), record, and check * for demotion. */ if ( power->flags.bm_check ) { u32 bm_status = 0; unsigned long diff = (NOW() - power->bm_check_timestamp) >> 23; if ( diff > 31 ) diff = 31; power->bm_activity <<= diff; acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); if ( bm_status ) { power->bm_activity |= 0x1; acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); } /* * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect * the true state of bus mastering activity; forcing us to * manually check the BMIDEA bit of each IDE channel. */ /*else if ( errata.piix4.bmisx ) { if ( (inb_p(errata.piix4.bmisx + 0x02) & 0x01) || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01) ) pr->power.bm_activity |= 0x1; }*/ power->bm_check_timestamp = NOW(); /* * If bus mastering is or was active this jiffy, demote * to avoid a faulty transition. Note that the processor * won't enter a low-power state during this call (to this * function) but should upon the next. * * TBD: A better policy might be to fallback to the demotion * state (use it for this quantum only) istead of * demoting -- and rely on duration as our sole demotion * qualification. This may, however, introduce DMA * issues (e.g. floppy DMA transfer overrun/underrun). */ if ( (power->bm_activity & 0x1) && cx->demotion.threshold.bm ) { local_irq_enable(); next_state = cx->demotion.state; goto end; } } /* * Sleep: * ------ * Invoke the current Cx state to put the processor to sleep. */ if ( cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3 ) smp_mb__after_clear_bit(); switch ( cx->type ) { case ACPI_STATE_C1: /* * Invoke C1. * Use the appropriate idle routine, the one that would * be used without acpi C-states. */ if ( pm_idle_save ) pm_idle_save(); else acpi_safe_halt(); /* * TBD: Can't get time duration while in C1, as resumes * go to an ISR rather than here. Need to instrument * base interrupt handler. */ sleep_ticks = 0xFFFFFFFF; break; case ACPI_STATE_C2: /* Get start time (ticks) */ t1 = inl(pmtmr_ioport); /* Invoke C2 */ acpi_idle_do_entry(cx); /* Get end time (ticks) */ t2 = inl(pmtmr_ioport); /* Re-enable interrupts */ local_irq_enable(); /* Compute time (ticks) that we were actually asleep */ sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; break; case ACPI_STATE_C3: /* * disable bus master * bm_check implies we need ARB_DIS * !bm_check implies we need cache flush * bm_control implies whether we can do ARB_DIS * * That leaves a case where bm_check is set and bm_control is * not set. In that case we cannot do much, we enter C3 * without doing anything. */ if ( power->flags.bm_check && power->flags.bm_control ) { atomic_inc(&c3_cpu_count); if ( atomic_read(&c3_cpu_count) == num_online_cpus() ) { /* * All CPUs are trying to go to C3 * Disable bus master arbitration */ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); } } else if ( !power->flags.bm_check ) { /* SMP with no shared cache... Invalidate cache */ ACPI_FLUSH_CPU_CACHE(); } /* * Before invoking C3, be aware that TSC/APIC timer may be * stopped by H/W. Without carefully handling of TSC/APIC stop issues, * deep C state can't work correctly. */ /* preparing TSC stop */ cstate_save_tsc(); /* preparing APIC stop */ lapic_timer_off(); /* Get start time (ticks) */ t1 = inl(pmtmr_ioport); /* Invoke C3 */ acpi_idle_do_entry(cx); /* Get end time (ticks) */ t2 = inl(pmtmr_ioport); /* recovering TSC */ cstate_restore_tsc(); if ( power->flags.bm_check && power->flags.bm_control ) { /* Enable bus master arbitration */ atomic_dec(&c3_cpu_count); acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); } /* Re-enable interrupts */ local_irq_enable(); /* recovering APIC */ lapic_timer_on(); /* Compute time (ticks) that we were actually asleep */ sleep_ticks = ticks_elapsed(t1, t2); /* Do not account our idle-switching overhead: */ sleep_ticks -= cx->latency_ticks + C3_OVERHEAD; break; default: local_irq_enable(); return; } cx->usage++; if ( (cx->type != ACPI_STATE_C1) && (sleep_ticks > 0) ) cx->time += sleep_ticks; next_state = power->state; /* * Promotion? * ---------- * Track the number of longs (time asleep is greater than threshold) * and promote when the count threshold is reached. Note that bus * mastering activity may prevent promotions. * Do not promote above max_cstate. */ if ( cx->promotion.state && ((cx->promotion.state - power->states) <= max_cstate) ) { if ( sleep_ticks > cx->promotion.threshold.ticks ) { cx->promotion.count++; cx->demotion.count = 0; if ( cx->promotion.count >= cx->promotion.threshold.count ) { if ( power->flags.bm_check ) { if ( !(power->bm_activity & cx->promotion.threshold.bm) )
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -